diff mbox series

[08/13] drm/i915: Allow a context to define its set of engines

Message ID 20190308141244.16837-9-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/13] drm/i915: Suppress the "Failed to idle" warning for gem_eio | expand

Commit Message

Chris Wilson March 8, 2019, 2:12 p.m. UTC
Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

v2: Fixup freeing of local on success of get_engines()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 204 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  22 +-
 include/uapi/drm/i915_drm.h                   |  42 +++-
 4 files changed, 259 insertions(+), 13 deletions(-)

Comments

Tvrtko Ursulin March 8, 2019, 4:27 p.m. UTC | #1
On 08/03/2019 14:12, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
> 
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
> 
> v2: Fixup freeing of local on success of get_engines()
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       | 204 +++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  22 +-
>   include/uapi/drm/i915_drm.h                   |  42 +++-
>   4 files changed, 259 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 2cfc68b66944..86d9bea6f275 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -101,6 +101,21 @@ static struct i915_global_gem_context {
>   	struct kmem_cache *slab_luts;
>   } global;
>   
> +static struct intel_engine_cs *
> +lookup_user_engine(struct i915_gem_context *ctx,
> +		   unsigned long flags, u16 class, u16 instance)
> +#define LOOKUP_USER_INDEX BIT(0)
> +{
> +	if (flags & LOOKUP_USER_INDEX) {
> +		if (instance >= ctx->nengine)
> +			return NULL;
> +
> +		return ctx->engines[instance];
> +	}
> +
> +	return intel_engine_lookup_user(ctx->i915, class, instance);
> +}
> +
>   struct i915_lut_handle *i915_lut_handle_alloc(void)
>   {
>   	return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> @@ -234,6 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	release_hw_id(ctx);
>   	i915_ppgtt_put(ctx->ppgtt);
>   
> +	kfree(ctx->engines);
> +
>   	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
>   		it->ops->destroy(it);
>   
> @@ -1311,9 +1328,9 @@ static int set_sseu(struct i915_gem_context *ctx,
>   	if (user_sseu.flags || user_sseu.rsvd)
>   		return -EINVAL;
>   
> -	engine = intel_engine_lookup_user(i915,
> -					  user_sseu.engine_class,
> -					  user_sseu.engine_instance);
> +	engine = lookup_user_engine(ctx, 0,
> +				    user_sseu.engine_class,
> +				    user_sseu.engine_instance);
>   	if (!engine)
>   		return -EINVAL;
>   
> @@ -1331,9 +1348,154 @@ static int set_sseu(struct i915_gem_context *ctx,
>   
>   	args->size = sizeof(user_sseu);
>   
> +	return 0;
> +};
> +
> +struct set_engines {
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs **engines;
> +	unsigned int nengine;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};
> +
> +static int
> +set_engines(struct i915_gem_context *ctx,
> +	    const struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user;
> +	struct set_engines set = { .ctx = ctx };
> +	u64 size, extensions;
> +	unsigned int n;
> +	int err;
> +
> +	user = u64_to_user_ptr(args->value);
> +	size = args->size;
> +	if (!size)
> +		goto out;

This prevents a hypothetical extension with empty map data.

> +
> +	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> +	if (size < sizeof(*user) || size % sizeof(*user->class_instance))

IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?

> +		return -EINVAL;
> +
> +	set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> +	if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)

I would prefer we drop the size restriction since it doesn't apply to 
the engine map per se.

> +		return -EINVAL;
> +
> +	set.engines = kmalloc_array(set.nengine,
> +				    sizeof(*set.engines),
> +				    GFP_KERNEL);
> +	if (!set.engines)
> +		return -ENOMEM;
> +
> +	for (n = 0; n < set.nengine; n++) {
> +		u16 class, inst;
> +
> +		if (get_user(class, &user->class_instance[n].engine_class) ||
> +		    get_user(inst, &user->class_instance[n].engine_instance)) {
> +			kfree(set.engines);
> +			return -EFAULT;
> +		}
> +
> +		if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> +		    inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> +			set.engines[n] = NULL;
> +			continue;
> +		}
> +
> +		set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> +		if (!set.engines[n]) {
> +			kfree(set.engines);
> +			return -ENOENT;
> +		}
> +	}
> +
> +	err = -EFAULT;
> +	if (!get_user(extensions, &user->extensions))
> +		err = i915_user_extensions(u64_to_user_ptr(extensions),
> +					   set_engines__extensions,
> +					   ARRAY_SIZE(set_engines__extensions),
> +					   &set);
> +	if (err) {
> +		kfree(set.engines);
> +		return err;
> +	}
> +
> +out:
> +	mutex_lock(&ctx->i915->drm.struct_mutex);
> +	kfree(ctx->engines);
> +	ctx->engines = set.engines;
> +	ctx->nengine = set.nengine;
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
>   	return 0;
>   }
>   
> +static int
> +get_engines(struct i915_gem_context *ctx,
> +	    struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines *local;
> +	unsigned int n, count, size;
> +	int err = 0;
> +
> +restart:
> +	count = READ_ONCE(ctx->nengine);
> +	if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
> +		return -ENOMEM; /* unrepresentable! */

Probably overly paranoid since we can't end up with this state set.

> +
> +	size = sizeof(*local) + count * sizeof(*local->class_instance);
> +	if (!args->size) {
> +		args->size = size;
> +		return 0;
> +	}
> +	if (args->size < size)
> +		return -EINVAL;
> +
> +	local = kmalloc(size, GFP_KERNEL);
> +	if (!local)
> +		return -ENOMEM;
> +
> +	if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
> +		err = -EINTR;
> +		goto out;
> +	}
> +
> +	if (READ_ONCE(ctx->nengine) != count) {
> +		mutex_unlock(&ctx->i915->drm.struct_mutex);
> +		kfree(local);
> +		goto restart;
> +	}
> +
> +	local->extensions = 0;
> +	for (n = 0; n < count; n++) {
> +		if (ctx->engines[n]) {
> +			local->class_instance[n].engine_class =
> +				ctx->engines[n]->uabi_class;
> +			local->class_instance[n].engine_instance =
> +				ctx->engines[n]->instance;
> +		} else {
> +			local->class_instance[n].engine_class =
> +				I915_ENGINE_CLASS_INVALID;
> +			local->class_instance[n].engine_instance =
> +				I915_ENGINE_CLASS_INVALID_NONE;
> +		}
> +	}
> +
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +	if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
> +		err = -EFAULT;
> +		goto out;
> +	}
> +	args->size = size;
> +
> +out:
> +	kfree(local);
> +	return err;
> +}
> +
>   static int ctx_setparam(struct i915_gem_context *ctx,
>   			struct drm_i915_gem_context_param *args)
>   {
> @@ -1406,6 +1568,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
>   		ret = set_ppgtt(ctx, args);
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = set_engines(ctx, args);
> +		break;
> +
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
> @@ -1459,6 +1625,22 @@ static int clone_sseu(struct i915_gem_context *dst,
>   	return 0;
>   }
>   
> +static int clone_engines(struct i915_gem_context *dst,
> +			 struct i915_gem_context *src)
> +{
> +	struct intel_engine_cs **engines;
> +
> +	engines = kmemdup(src->engines,
> +			  sizeof(*src->engines) * src->nengine,
> +			  GFP_KERNEL);
> +	if (!engines)
> +		return -ENOMEM;
> +
> +	dst->engines = engines;
> +	dst->nengine = src->nengine;
> +	return 0;
> +}
> +
>   static int create_clone(struct i915_user_extension __user *ext, void *data)
>   {
>   	struct drm_i915_gem_context_create_ext_clone local;
> @@ -1514,6 +1696,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
>   		i915_ppgtt_open(dst->ppgtt);
>   	}
>   
> +	if (local.flags & I915_CONTEXT_CLONE_ENGINES && src->nengine) {
> +		err = clone_engines(dst, src);
> +		if (err)
> +			return err;
> +	}
> +
>   	return 0;
>   }
>   
> @@ -1632,9 +1820,9 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	if (user_sseu.flags || user_sseu.rsvd)
>   		return -EINVAL;
>   
> -	engine = intel_engine_lookup_user(ctx->i915,
> -					  user_sseu.engine_class,
> -					  user_sseu.engine_instance);
> +	engine = lookup_user_engine(ctx, 0,
> +				    user_sseu.engine_class,
> +				    user_sseu.engine_instance);
>   	if (!engine)
>   		return -EINVAL;
>   
> @@ -1715,6 +1903,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   		ret = get_ppgtt(ctx, args);
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = get_engines(ctx, args);
> +		break;
> +
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index f8f6e6c960a7..8a89f3053f73 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -41,6 +41,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct intel_engine_cs **engines;
> +
>   	struct i915_timeline *timeline;
>   
>   	/**
> @@ -110,6 +112,8 @@ struct i915_gem_context {
>   #define CONTEXT_CLOSED			1
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   
> +	unsigned int nengine;
> +
>   	/**
>   	 * @hw_id: - unique identifier for the context
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ee6d301a9627..67e4a0c2ebff 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2090,13 +2090,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>   };
>   
>   static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +eb_select_engine(struct i915_execbuffer *eb,
>   		 struct drm_file *file,
>   		 struct drm_i915_gem_execbuffer2 *args)
>   {
>   	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>   	struct intel_engine_cs *engine;
>   
> +	if (eb->ctx->engines) {
> +		if (user_ring_id >= eb->ctx->nengine) {
> +			DRM_DEBUG("execbuf with unknown ring: %u\n",
> +				  user_ring_id);
> +			return NULL;
> +		}
> +
> +		return eb->ctx->engines[user_ring_id];
> +	}
> +
>   	if (user_ring_id > I915_USER_RINGS) {
>   		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>   		return NULL;
> @@ -2109,11 +2119,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   		return NULL;
>   	}
>   
> -	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
> +	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
>   		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>   
>   		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> -			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> +			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>   		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>   			   bsd_idx <= I915_EXEC_BSD_RING2) {
>   			bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2124,9 +2134,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   			return NULL;
>   		}
>   
> -		engine = dev_priv->engine[_VCS(bsd_idx)];
> +		engine = eb->i915->engine[_VCS(bsd_idx)];
>   	} else {
> -		engine = dev_priv->engine[user_ring_map[user_ring_id]];
> +		engine = eb->i915->engine[user_ring_map[user_ring_id]];
>   	}
>   
>   	if (!engine) {
> @@ -2336,7 +2346,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_destroy;
>   
> -	eb.engine = eb_select_engine(eb.i915, file, args);
> +	eb.engine = eb_select_engine(&eb, file, args);
>   	if (!eb.engine) {
>   		err = -EINVAL;
>   		goto err_engine;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 50d154954d5f..00147b990e63 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -124,6 +124,8 @@ enum drm_i915_gem_engine_class {
>   	I915_ENGINE_CLASS_INVALID	= -1
>   };
>   
> +#define I915_ENGINE_CLASS_INVALID_NONE -1
> +
>   /**
>    * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
>    *
> @@ -1509,6 +1511,26 @@ struct drm_i915_gem_context_param {
>   	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
>   	 */
>   #define I915_CONTEXT_PARAM_VM		0x9
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. Slots 0...N are filled in using the specified (class, instance).
> + * Use
> + *	engine_class: I915_ENGINE_CLASS_INVALID,
> + *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
> + * to specify a gap in the array that can be filled in later, e.g. by a
> + * virtual engine used for load balancing.
> + *
> + * Setting the number of engines bound to the context to 0, by passing a zero
> + * sized argument, will revert back to default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
>   
>   	__u64 value;
> @@ -1573,6 +1595,23 @@ struct drm_i915_gem_context_param_sseu {
>   	__u32 rsvd;
>   };
>   
> +struct i915_context_param_engines {
> +	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +
> +	struct {
> +		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
> +		__u16 engine_instance;
> +	} class_instance[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
> +	__u64 extensions; \
> +	struct { \
> +		__u16 engine_class; \
> +		__u16 engine_instance; \
> +	} class_instance[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct drm_i915_gem_context_create_ext_setparam {
>   #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
>   	struct i915_user_extension base;
> @@ -1589,7 +1628,8 @@ struct drm_i915_gem_context_create_ext_clone {
>   #define I915_CONTEXT_CLONE_SSEU		(1u << 2)
>   #define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
>   #define I915_CONTEXT_CLONE_VM		(1u << 4)
> -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> +#define I915_CONTEXT_CLONE_ENGINES	(1u << 5)
> +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
>   	__u64 rsvd;
>   };
>   
> 

Regards,

Tvrtko
Chris Wilson March 8, 2019, 4:47 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
> 
> On 08/03/2019 14:12, Chris Wilson wrote:
> > Over the last few years, we have debated how to extend the user API to
> > support an increase in the number of engines, that may be sparse and
> > even be heterogeneous within a class (not all video decoders created
> > equal). We settled on using (class, instance) tuples to identify a
> > specific engine, with an API for the user to construct a map of engines
> > to capabilities. Into this picture, we then add a challenge of virtual
> > engines; one user engine that maps behind the scenes to any number of
> > physical engines. To keep it general, we want the user to have full
> > control over that mapping. To that end, we allow the user to constrain a
> > context to define the set of engines that it can access, order fully
> > controlled by the user via (class, instance). With such precise control
> > in context setup, we can continue to use the existing execbuf uABI of
> > specifying a single index; only now it doesn't automagically map onto
> > the engines, it uses the user defined engine map from the context.
> > 
> > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> > execbuf. It's use will be revealed in the next patch.
> > 
> > v2: Fixup freeing of local on success of get_engines()
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c       | 204 +++++++++++++++++-
> >   drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
> >   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  22 +-
> >   include/uapi/drm/i915_drm.h                   |  42 +++-
> >   4 files changed, 259 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 2cfc68b66944..86d9bea6f275 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -101,6 +101,21 @@ static struct i915_global_gem_context {
> >       struct kmem_cache *slab_luts;
> >   } global;
> >   
> > +static struct intel_engine_cs *
> > +lookup_user_engine(struct i915_gem_context *ctx,
> > +                unsigned long flags, u16 class, u16 instance)
> > +#define LOOKUP_USER_INDEX BIT(0)
> > +{
> > +     if (flags & LOOKUP_USER_INDEX) {
> > +             if (instance >= ctx->nengine)
> > +                     return NULL;
> > +
> > +             return ctx->engines[instance];
> > +     }
> > +
> > +     return intel_engine_lookup_user(ctx->i915, class, instance);
> > +}
> > +
> >   struct i915_lut_handle *i915_lut_handle_alloc(void)
> >   {
> >       return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> > @@ -234,6 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> >       release_hw_id(ctx);
> >       i915_ppgtt_put(ctx->ppgtt);
> >   
> > +     kfree(ctx->engines);
> > +
> >       rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> >               it->ops->destroy(it);
> >   
> > @@ -1311,9 +1328,9 @@ static int set_sseu(struct i915_gem_context *ctx,
> >       if (user_sseu.flags || user_sseu.rsvd)
> >               return -EINVAL;
> >   
> > -     engine = intel_engine_lookup_user(i915,
> > -                                       user_sseu.engine_class,
> > -                                       user_sseu.engine_instance);
> > +     engine = lookup_user_engine(ctx, 0,
> > +                                 user_sseu.engine_class,
> > +                                 user_sseu.engine_instance);
> >       if (!engine)
> >               return -EINVAL;
> >   
> > @@ -1331,9 +1348,154 @@ static int set_sseu(struct i915_gem_context *ctx,
> >   
> >       args->size = sizeof(user_sseu);
> >   
> > +     return 0;
> > +};
> > +
> > +struct set_engines {
> > +     struct i915_gem_context *ctx;
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +};
> > +
> > +static const i915_user_extension_fn set_engines__extensions[] = {
> > +};
> > +
> > +static int
> > +set_engines(struct i915_gem_context *ctx,
> > +         const struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines __user *user;
> > +     struct set_engines set = { .ctx = ctx };
> > +     u64 size, extensions;
> > +     unsigned int n;
> > +     int err;
> > +
> > +     user = u64_to_user_ptr(args->value);
> > +     size = args->size;
> > +     if (!size)
> > +             goto out;
> 
> This prevents a hypothetical extension with empty map data.

No... This is required for resetting and I think that's covered in what
little docs there are. It's the set.nengine==0 test later
that you mean to object to. But we can't do that as that's how we
differentiate between modes at the moment.

We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.

> > +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> > +     if (size < sizeof(*user) || size % sizeof(*user->class_instance))
> 
> IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?
> 
> > +             return -EINVAL;
> > +
> > +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> > +     if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
> 
> I would prefer we drop the size restriction since it doesn't apply to 
> the engine map per se.

u64 is a limit that will be non-trivial to lift. Marking the limits of
the kernel doesn't restrict it being lifted later.

> > +             return -EINVAL;
> > +
> > +     set.engines = kmalloc_array(set.nengine,
> > +                                 sizeof(*set.engines),
> > +                                 GFP_KERNEL);
> > +     if (!set.engines)
> > +             return -ENOMEM;
> > +
> > +     for (n = 0; n < set.nengine; n++) {
> > +             u16 class, inst;
> > +
> > +             if (get_user(class, &user->class_instance[n].engine_class) ||
> > +                 get_user(inst, &user->class_instance[n].engine_instance)) {
> > +                     kfree(set.engines);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> > +                 inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> > +                     set.engines[n] = NULL;
> > +                     continue;
> > +             }
> > +
> > +             set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> > +             if (!set.engines[n]) {
> > +                     kfree(set.engines);
> > +                     return -ENOENT;
> > +             }
> > +     }
> > +
> > +     err = -EFAULT;
> > +     if (!get_user(extensions, &user->extensions))
> > +             err = i915_user_extensions(u64_to_user_ptr(extensions),
> > +                                        set_engines__extensions,
> > +                                        ARRAY_SIZE(set_engines__extensions),
> > +                                        &set);
> > +     if (err) {
> > +             kfree(set.engines);
> > +             return err;
> > +     }
> > +
> > +out:
> > +     mutex_lock(&ctx->i915->drm.struct_mutex);
> > +     kfree(ctx->engines);
> > +     ctx->engines = set.engines;
> > +     ctx->nengine = set.nengine;
> > +     mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +
> >       return 0;
> >   }
> >   
> > +static int
> > +get_engines(struct i915_gem_context *ctx,
> > +         struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines *local;
> > +     unsigned int n, count, size;
> > +     int err = 0;
> > +
> > +restart:
> > +     count = READ_ONCE(ctx->nengine);
> > +     if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
> > +             return -ENOMEM; /* unrepresentable! */
> 
> Probably overly paranoid since we can't end up with this state set.

And I thought you wanted many engines! Paranoia around kmalloc/user
oveflows is always useful, because you know someone will send a patch
later (and smatch doesn't really care as it only checks the limits of
types and local constraints).
-Chris
Tvrtko Ursulin March 11, 2019, 9:23 a.m. UTC | #3
On 08/03/2019 16:47, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
>>
>> On 08/03/2019 14:12, Chris Wilson wrote:
>>> Over the last few years, we have debated how to extend the user API to
>>> support an increase in the number of engines, that may be sparse and
>>> even be heterogeneous within a class (not all video decoders created
>>> equal). We settled on using (class, instance) tuples to identify a
>>> specific engine, with an API for the user to construct a map of engines
>>> to capabilities. Into this picture, we then add a challenge of virtual
>>> engines; one user engine that maps behind the scenes to any number of
>>> physical engines. To keep it general, we want the user to have full
>>> control over that mapping. To that end, we allow the user to constrain a
>>> context to define the set of engines that it can access, order fully
>>> controlled by the user via (class, instance). With such precise control
>>> in context setup, we can continue to use the existing execbuf uABI of
>>> specifying a single index; only now it doesn't automagically map onto
>>> the engines, it uses the user defined engine map from the context.
>>>
>>> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
>>> execbuf. It's use will be revealed in the next patch.
>>>
>>> v2: Fixup freeing of local on success of get_engines()
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.c       | 204 +++++++++++++++++-
>>>    drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
>>>    drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  22 +-
>>>    include/uapi/drm/i915_drm.h                   |  42 +++-
>>>    4 files changed, 259 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 2cfc68b66944..86d9bea6f275 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -101,6 +101,21 @@ static struct i915_global_gem_context {
>>>        struct kmem_cache *slab_luts;
>>>    } global;
>>>    
>>> +static struct intel_engine_cs *
>>> +lookup_user_engine(struct i915_gem_context *ctx,
>>> +                unsigned long flags, u16 class, u16 instance)
>>> +#define LOOKUP_USER_INDEX BIT(0)
>>> +{
>>> +     if (flags & LOOKUP_USER_INDEX) {
>>> +             if (instance >= ctx->nengine)
>>> +                     return NULL;
>>> +
>>> +             return ctx->engines[instance];
>>> +     }
>>> +
>>> +     return intel_engine_lookup_user(ctx->i915, class, instance);
>>> +}
>>> +
>>>    struct i915_lut_handle *i915_lut_handle_alloc(void)
>>>    {
>>>        return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
>>> @@ -234,6 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>>>        release_hw_id(ctx);
>>>        i915_ppgtt_put(ctx->ppgtt);
>>>    
>>> +     kfree(ctx->engines);
>>> +
>>>        rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
>>>                it->ops->destroy(it);
>>>    
>>> @@ -1311,9 +1328,9 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>        if (user_sseu.flags || user_sseu.rsvd)
>>>                return -EINVAL;
>>>    
>>> -     engine = intel_engine_lookup_user(i915,
>>> -                                       user_sseu.engine_class,
>>> -                                       user_sseu.engine_instance);
>>> +     engine = lookup_user_engine(ctx, 0,
>>> +                                 user_sseu.engine_class,
>>> +                                 user_sseu.engine_instance);
>>>        if (!engine)
>>>                return -EINVAL;
>>>    
>>> @@ -1331,9 +1348,154 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>    
>>>        args->size = sizeof(user_sseu);
>>>    
>>> +     return 0;
>>> +};
>>> +
>>> +struct set_engines {
>>> +     struct i915_gem_context *ctx;
>>> +     struct intel_engine_cs **engines;
>>> +     unsigned int nengine;
>>> +};
>>> +
>>> +static const i915_user_extension_fn set_engines__extensions[] = {
>>> +};
>>> +
>>> +static int
>>> +set_engines(struct i915_gem_context *ctx,
>>> +         const struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines __user *user;
>>> +     struct set_engines set = { .ctx = ctx };
>>> +     u64 size, extensions;
>>> +     unsigned int n;
>>> +     int err;
>>> +
>>> +     user = u64_to_user_ptr(args->value);
>>> +     size = args->size;
>>> +     if (!size)
>>> +             goto out;
>>
>> This prevents a hypothetical extension with empty map data.
> 
> No... This is required for resetting and I think that's covered in what
> little docs there are. It's the set.nengine==0 test later
> that you mean to object to. But we can't do that as that's how we
> differentiate between modes at the moment.
> 
> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.

size == sizeof(struct i915_context_param_engines) could mean reset - 
meaning no map array provided.

Meaning one could reset the map and still pass in extensions.

> 
>>> +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
>>> +     if (size < sizeof(*user) || size % sizeof(*user->class_instance))
>>
>> IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?
>>
>>> +             return -EINVAL;
>>> +
>>> +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
>>> +     if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
>>
>> I would prefer we drop the size restriction since it doesn't apply to
>> the engine map per se.
> 
> u64 is a limit that will be non-trivial to lift. Marking the limits of
> the kernel doesn't restrict it being lifted later.

My thinking is that u64 limit applies to the load balancing extension, 
and the 64 engine limit applies to execbuf. Engine map itself is not 
limited. But I guess it is a theoretical/pointless discussion at this point.

> 
>>> +             return -EINVAL;
>>> +
>>> +     set.engines = kmalloc_array(set.nengine,
>>> +                                 sizeof(*set.engines),
>>> +                                 GFP_KERNEL);
>>> +     if (!set.engines)
>>> +             return -ENOMEM;
>>> +
>>> +     for (n = 0; n < set.nengine; n++) {
>>> +             u16 class, inst;
>>> +
>>> +             if (get_user(class, &user->class_instance[n].engine_class) ||
>>> +                 get_user(inst, &user->class_instance[n].engine_instance)) {
>>> +                     kfree(set.engines);
>>> +                     return -EFAULT;
>>> +             }
>>> +
>>> +             if (class == (u16)I915_ENGINE_CLASS_INVALID &&
>>> +                 inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
>>> +                     set.engines[n] = NULL;
>>> +                     continue;
>>> +             }
>>> +
>>> +             set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
>>> +             if (!set.engines[n]) {
>>> +                     kfree(set.engines);
>>> +                     return -ENOENT;
>>> +             }
>>> +     }
>>> +
>>> +     err = -EFAULT;
>>> +     if (!get_user(extensions, &user->extensions))
>>> +             err = i915_user_extensions(u64_to_user_ptr(extensions),
>>> +                                        set_engines__extensions,
>>> +                                        ARRAY_SIZE(set_engines__extensions),
>>> +                                        &set);
>>> +     if (err) {
>>> +             kfree(set.engines);
>>> +             return err;
>>> +     }
>>> +
>>> +out:
>>> +     mutex_lock(&ctx->i915->drm.struct_mutex);
>>> +     kfree(ctx->engines);
>>> +     ctx->engines = set.engines;
>>> +     ctx->nengine = set.nengine;
>>> +     mutex_unlock(&ctx->i915->drm.struct_mutex);
>>> +
>>>        return 0;
>>>    }
>>>    
>>> +static int
>>> +get_engines(struct i915_gem_context *ctx,
>>> +         struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines *local;
>>> +     unsigned int n, count, size;
>>> +     int err = 0;
>>> +
>>> +restart:
>>> +     count = READ_ONCE(ctx->nengine);
>>> +     if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
>>> +             return -ENOMEM; /* unrepresentable! */
>>
>> Probably overly paranoid since we can't end up with this state set.
> 
> And I thought you wanted many engines! Paranoia around kmalloc/user
> oveflows is always useful, because you know someone will send a patch
> later (and smatch doesn't really care as it only checks the limits of
> types and local constraints).

Put a comment on what it is checking then. Why INT_MAX and not U32_MAX btw?

Regards,

Tvrtko
Chris Wilson March 11, 2019, 9:45 a.m. UTC | #4
Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
> 
> On 08/03/2019 16:47, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
> >>
> >> On 08/03/2019 14:12, Chris Wilson wrote:
> >>> +static int
> >>> +set_engines(struct i915_gem_context *ctx,
> >>> +         const struct drm_i915_gem_context_param *args)
> >>> +{
> >>> +     struct i915_context_param_engines __user *user;
> >>> +     struct set_engines set = { .ctx = ctx };
> >>> +     u64 size, extensions;
> >>> +     unsigned int n;
> >>> +     int err;
> >>> +
> >>> +     user = u64_to_user_ptr(args->value);
> >>> +     size = args->size;
> >>> +     if (!size)
> >>> +             goto out;
> >>
> >> This prevents a hypothetical extension with empty map data.
> > 
> > No... This is required for resetting and I think that's covered in what
> > little docs there are. It's the set.nengine==0 test later
> > that you mean to object to. But we can't do that as that's how we
> > differentiate between modes at the moment.
> > 
> > We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
> 
> size == sizeof(struct i915_context_param_engines) could mean reset - 
> meaning no map array provided.

Nah, size=sizeof() => 0 [], size=0 => default map.
 
> Meaning one could reset the map and still pass in extensions.

I missed that you were pointing out we didn't follow the extensions on
resetting.

I'm not sure if that makes sense tbh. The extensions are written around
the concept of applying to the new engines[], and if the use has
explicitly removed the engines[] (distinct from defining a zero array),
what extensions can apply? One hopes they end up -EINVAL. As they should
-EINVAL, I guess it is no harm done to apply them.

> >>> +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> >>> +     if (size < sizeof(*user) || size % sizeof(*user->class_instance))
> >>
> >> IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?
> >>
> >>> +             return -EINVAL;
> >>> +
> >>> +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> >>> +     if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
> >>
> >> I would prefer we drop the size restriction since it doesn't apply to
> >> the engine map per se.
> > 
> > u64 is a limit that will be non-trivial to lift. Marking the limits of
> > the kernel doesn't restrict it being lifted later.
> 
> My thinking is that u64 limit applies to the load balancing extension, 
> and the 64 engine limit applies to execbuf. Engine map itself is not 
> limited. But I guess it is a theoretical/pointless discussion at this point.

I know what you mean, I'm just looking at that we use u64 around the
uAPI for masks, and u8/unsigned long internally. So even going beyond
BITS_PER_LONG is problematic.

I'm in two minds. Yes, the limit doesn't apply to engines[] itself, but
for practical reasons there is a limit, and until we can remove those,
lifting the restriction here is immaterial :|

> >>> +static int
> >>> +get_engines(struct i915_gem_context *ctx,
> >>> +         struct drm_i915_gem_context_param *args)
> >>> +{
> >>> +     struct i915_context_param_engines *local;
> >>> +     unsigned int n, count, size;
> >>> +     int err = 0;
> >>> +
> >>> +restart:
> >>> +     count = READ_ONCE(ctx->nengine);
> >>> +     if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
> >>> +             return -ENOMEM; /* unrepresentable! */
> >>
> >> Probably overly paranoid since we can't end up with this state set.
> > 
> > And I thought you wanted many engines! Paranoia around kmalloc/user
> > oveflows is always useful, because you know someone will send a patch
> > later (and smatch doesn't really care as it only checks the limits of
> > types and local constraints).
> 
> Put a comment on what it is checking then. Why INT_MAX and not U32_MAX btw?

Vague memories about what gets checked for overflow in drm_malloc_large.
Nowadays, the in-trend is check_mul_overflow() with size_t.
-Chris
Tvrtko Ursulin March 11, 2019, 10:12 a.m. UTC | #5
On 11/03/2019 09:45, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
>>
>> On 08/03/2019 16:47, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
>>>>
>>>> On 08/03/2019 14:12, Chris Wilson wrote:
>>>>> +static int
>>>>> +set_engines(struct i915_gem_context *ctx,
>>>>> +         const struct drm_i915_gem_context_param *args)
>>>>> +{
>>>>> +     struct i915_context_param_engines __user *user;
>>>>> +     struct set_engines set = { .ctx = ctx };
>>>>> +     u64 size, extensions;
>>>>> +     unsigned int n;
>>>>> +     int err;
>>>>> +
>>>>> +     user = u64_to_user_ptr(args->value);
>>>>> +     size = args->size;
>>>>> +     if (!size)
>>>>> +             goto out;
>>>>
>>>> This prevents a hypothetical extension with empty map data.
>>>
>>> No... This is required for resetting and I think that's covered in what
>>> little docs there are. It's the set.nengine==0 test later
>>> that you mean to object to. But we can't do that as that's how we
>>> differentiate between modes at the moment.
>>>
>>> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
>>
>> size == sizeof(struct i915_context_param_engines) could mean reset -
>> meaning no map array provided.
> 
> Nah, size=sizeof() => 0 [], size=0 => default map.
>   
>> Meaning one could reset the map and still pass in extensions.
> 
> I missed that you were pointing out we didn't follow the extensions on
> resetting.
> 
> I'm not sure if that makes sense tbh. The extensions are written around
> the concept of applying to the new engines[], and if the use has
> explicitly removed the engines[] (distinct from defining a zero array),
> what extensions can apply? One hopes they end up -EINVAL. As they should
> -EINVAL, I guess it is no harm done to apply them.

Yeah it is hypothetical for now. Just future proofing for if we add some 
engine map extension which makes sense after resetting the map.

>>>>> +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
>>>>> +     if (size < sizeof(*user) || size % sizeof(*user->class_instance))
>>>>
>>>> IS_ALIGNED for the second condition for consistency with the BUILD_BUG_ON?
>>>>
>>>>> +             return -EINVAL;
>>>>> +
>>>>> +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
>>>>> +     if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
>>>>
>>>> I would prefer we drop the size restriction since it doesn't apply to
>>>> the engine map per se.
>>>
>>> u64 is a limit that will be non-trivial to lift. Marking the limits of
>>> the kernel doesn't restrict it being lifted later.
>>
>> My thinking is that u64 limit applies to the load balancing extension,
>> and the 64 engine limit applies to execbuf. Engine map itself is not
>> limited. But I guess it is a theoretical/pointless discussion at this point.
> 
> I know what you mean, I'm just looking at that we use u64 around the
> uAPI for masks, and u8/unsigned long internally. So even going beyond
> BITS_PER_LONG is problematic.
> 
> I'm in two minds. Yes, the limit doesn't apply to engines[] itself, but
> for practical reasons there is a limit, and until we can remove those,
> lifting the restriction here is immaterial :|

Ok.

> 
>>>>> +static int
>>>>> +get_engines(struct i915_gem_context *ctx,
>>>>> +         struct drm_i915_gem_context_param *args)
>>>>> +{
>>>>> +     struct i915_context_param_engines *local;
>>>>> +     unsigned int n, count, size;
>>>>> +     int err = 0;
>>>>> +
>>>>> +restart:
>>>>> +     count = READ_ONCE(ctx->nengine);
>>>>> +     if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
>>>>> +             return -ENOMEM; /* unrepresentable! */
>>>>
>>>> Probably overly paranoid since we can't end up with this state set.
>>>
>>> And I thought you wanted many engines! Paranoia around kmalloc/user
>>> oveflows is always useful, because you know someone will send a patch
>>> later (and smatch doesn't really care as it only checks the limits of
>>> types and local constraints).
>>
>> Put a comment on what it is checking then. Why INT_MAX and not U32_MAX btw?
> 
> Vague memories about what gets checked for overflow in drm_malloc_large.
> Nowadays, the in-trend is check_mul_overflow() with size_t.

Oh that one.. I was thinking about args->size = size assignment (u32). 
Both are 32-bit luckily.

Regards,

Tvrtko
Chris Wilson March 11, 2019, 2:45 p.m. UTC | #6
Quoting Chris Wilson (2019-03-11 09:45:17)
> Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
> > 
> > On 08/03/2019 16:47, Chris Wilson wrote:
> > > Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
> > >>
> > >> On 08/03/2019 14:12, Chris Wilson wrote:
> > >>> +static int
> > >>> +set_engines(struct i915_gem_context *ctx,
> > >>> +         const struct drm_i915_gem_context_param *args)
> > >>> +{
> > >>> +     struct i915_context_param_engines __user *user;
> > >>> +     struct set_engines set = { .ctx = ctx };
> > >>> +     u64 size, extensions;
> > >>> +     unsigned int n;
> > >>> +     int err;
> > >>> +
> > >>> +     user = u64_to_user_ptr(args->value);
> > >>> +     size = args->size;
> > >>> +     if (!size)
> > >>> +             goto out;
> > >>
> > >> This prevents a hypothetical extension with empty map data.
> > > 
> > > No... This is required for resetting and I think that's covered in what
> > > little docs there are. It's the set.nengine==0 test later
> > > that you mean to object to. But we can't do that as that's how we
> > > differentiate between modes at the moment.
> > > 
> > > We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
> > 
> > size == sizeof(struct i915_context_param_engines) could mean reset - 
> > meaning no map array provided.
> 
> Nah, size=sizeof() => 0 [], size=0 => default map.
>  
> > Meaning one could reset the map and still pass in extensions.
> 
> I missed that you were pointing out we didn't follow the extensions on
> resetting.
> 
> I'm not sure if that makes sense tbh. The extensions are written around
> the concept of applying to the new engines[], and if the use has
> explicitly removed the engines[] (distinct from defining a zero array),
> what extensions can apply? One hopes they end up -EINVAL. As they should
> -EINVAL, I guess it is no harm done to apply them.

Ok, the problem with the size=0 case is that quite obviously there is no
extension chain to follow. (That was silly of me.)

I think
	.size = 0 => reset to default
and
	.size = sizeof(arg) => 0 engines ([])
	.size = sizeof(arg) + N*sizeof(*class_instance) => N engines ([N])
make the most logical sense, which does imply that if you want to apply
extension options to ctx->engines[] you need to specify them.

And that also implies that if we have an extension that may make sense
to the default setup, then either we say creating the engine[] map is
compulsory, or we don't use a set-engines extension for that.
-Chris
Tvrtko Ursulin March 11, 2019, 4:16 p.m. UTC | #7
On 11/03/2019 14:45, Chris Wilson wrote:
> Quoting Chris Wilson (2019-03-11 09:45:17)
>> Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
>>>
>>> On 08/03/2019 16:47, Chris Wilson wrote:
>>>> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
>>>>>
>>>>> On 08/03/2019 14:12, Chris Wilson wrote:
>>>>>> +static int
>>>>>> +set_engines(struct i915_gem_context *ctx,
>>>>>> +         const struct drm_i915_gem_context_param *args)
>>>>>> +{
>>>>>> +     struct i915_context_param_engines __user *user;
>>>>>> +     struct set_engines set = { .ctx = ctx };
>>>>>> +     u64 size, extensions;
>>>>>> +     unsigned int n;
>>>>>> +     int err;
>>>>>> +
>>>>>> +     user = u64_to_user_ptr(args->value);
>>>>>> +     size = args->size;
>>>>>> +     if (!size)
>>>>>> +             goto out;
>>>>>
>>>>> This prevents a hypothetical extension with empty map data.
>>>>
>>>> No... This is required for resetting and I think that's covered in what
>>>> little docs there are. It's the set.nengine==0 test later
>>>> that you mean to object to. But we can't do that as that's how we
>>>> differentiate between modes at the moment.
>>>>
>>>> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
>>>
>>> size == sizeof(struct i915_context_param_engines) could mean reset -
>>> meaning no map array provided.
>>
>> Nah, size=sizeof() => 0 [], size=0 => default map.
>>   
>>> Meaning one could reset the map and still pass in extensions.
>>
>> I missed that you were pointing out we didn't follow the extensions on
>> resetting.
>>
>> I'm not sure if that makes sense tbh. The extensions are written around
>> the concept of applying to the new engines[], and if the use has
>> explicitly removed the engines[] (distinct from defining a zero array),
>> what extensions can apply? One hopes they end up -EINVAL. As they should
>> -EINVAL, I guess it is no harm done to apply them.
> 
> Ok, the problem with the size=0 case is that quite obviously there is no
> extension chain to follow. (That was silly of me.)
> 
> I think
> 	.size = 0 => reset to default
> and
> 	.size = sizeof(arg) => 0 engines ([])

What is the difference between these two?

> 	.size = sizeof(arg) + N*sizeof(*class_instance) => N engines ([N])
> make the most logical sense, which does imply that if you want to apply
> extension options to ctx->engines[] you need to specify them.
> 
> And that also implies that if we have an extension that may make sense
> to the default setup, then either we say creating the engine[] map is
> compulsory, or we don't use a set-engines extension for that.

Hm.. load_balance is extension of set_engines. If we wanted to go crazy 
we could also support it directly from set_param(param=LOAD_BALANCE)?

I am not saying it makes sense, just thinking out loud.

Regards,

Tvrtko
Chris Wilson March 11, 2019, 4:22 p.m. UTC | #8
Quoting Tvrtko Ursulin (2019-03-11 16:16:27)
> 
> On 11/03/2019 14:45, Chris Wilson wrote:
> > Quoting Chris Wilson (2019-03-11 09:45:17)
> >> Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
> >>>
> >>> On 08/03/2019 16:47, Chris Wilson wrote:
> >>>> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
> >>>>>
> >>>>> On 08/03/2019 14:12, Chris Wilson wrote:
> >>>>>> +static int
> >>>>>> +set_engines(struct i915_gem_context *ctx,
> >>>>>> +         const struct drm_i915_gem_context_param *args)
> >>>>>> +{
> >>>>>> +     struct i915_context_param_engines __user *user;
> >>>>>> +     struct set_engines set = { .ctx = ctx };
> >>>>>> +     u64 size, extensions;
> >>>>>> +     unsigned int n;
> >>>>>> +     int err;
> >>>>>> +
> >>>>>> +     user = u64_to_user_ptr(args->value);
> >>>>>> +     size = args->size;
> >>>>>> +     if (!size)
> >>>>>> +             goto out;
> >>>>>
> >>>>> This prevents a hypothetical extension with empty map data.
> >>>>
> >>>> No... This is required for resetting and I think that's covered in what
> >>>> little docs there are. It's the set.nengine==0 test later
> >>>> that you mean to object to. But we can't do that as that's how we
> >>>> differentiate between modes at the moment.
> >>>>
> >>>> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
> >>>
> >>> size == sizeof(struct i915_context_param_engines) could mean reset -
> >>> meaning no map array provided.
> >>
> >> Nah, size=sizeof() => 0 [], size=0 => default map.
> >>   
> >>> Meaning one could reset the map and still pass in extensions.
> >>
> >> I missed that you were pointing out we didn't follow the extensions on
> >> resetting.
> >>
> >> I'm not sure if that makes sense tbh. The extensions are written around
> >> the concept of applying to the new engines[], and if the use has
> >> explicitly removed the engines[] (distinct from defining a zero array),
> >> what extensions can apply? One hopes they end up -EINVAL. As they should
> >> -EINVAL, I guess it is no harm done to apply them.
> > 
> > Ok, the problem with the size=0 case is that quite obviously there is no
> > extension chain to follow. (That was silly of me.)
> > 
> > I think
> >       .size = 0 => reset to default
> > and
> >       .size = sizeof(arg) => 0 engines ([])
> 
> What is the difference between these two?

One uses the legacy ring mask, and the other is a context with no
engines.

> >       .size = sizeof(arg) + N*sizeof(*class_instance) => N engines ([N])
> > make the most logical sense, which does imply that if you want to apply
> > extension options to ctx->engines[] you need to specify them.
> > 
> > And that also implies that if we have an extension that may make sense
> > to the default setup, then either we say creating the engine[] map is
> > compulsory, or we don't use a set-engines extension for that.
> 
> Hm.. load_balance is extension of set_engines. If we wanted to go crazy 
> we could also support it directly from set_param(param=LOAD_BALANCE)?

My thinking was that it only made sense as a constructor property. (Back
before we hooked set-parameter for constructor properties). I still like
how set-engines + extensions has turned out. (But haven't coded up the
alternatives, so who knows.)
-Chris
Tvrtko Ursulin March 11, 2019, 4:34 p.m. UTC | #9
On 11/03/2019 16:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-11 16:16:27)
>>
>> On 11/03/2019 14:45, Chris Wilson wrote:
>>> Quoting Chris Wilson (2019-03-11 09:45:17)
>>>> Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
>>>>>
>>>>> On 08/03/2019 16:47, Chris Wilson wrote:
>>>>>> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
>>>>>>>
>>>>>>> On 08/03/2019 14:12, Chris Wilson wrote:
>>>>>>>> +static int
>>>>>>>> +set_engines(struct i915_gem_context *ctx,
>>>>>>>> +         const struct drm_i915_gem_context_param *args)
>>>>>>>> +{
>>>>>>>> +     struct i915_context_param_engines __user *user;
>>>>>>>> +     struct set_engines set = { .ctx = ctx };
>>>>>>>> +     u64 size, extensions;
>>>>>>>> +     unsigned int n;
>>>>>>>> +     int err;
>>>>>>>> +
>>>>>>>> +     user = u64_to_user_ptr(args->value);
>>>>>>>> +     size = args->size;
>>>>>>>> +     if (!size)
>>>>>>>> +             goto out;
>>>>>>>
>>>>>>> This prevents a hypothetical extension with empty map data.
>>>>>>
>>>>>> No... This is required for resetting and I think that's covered in what
>>>>>> little docs there are. It's the set.nengine==0 test later
>>>>>> that you mean to object to. But we can't do that as that's how we
>>>>>> differentiate between modes at the moment.
>>>>>>
>>>>>> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
>>>>>
>>>>> size == sizeof(struct i915_context_param_engines) could mean reset -
>>>>> meaning no map array provided.
>>>>
>>>> Nah, size=sizeof() => 0 [], size=0 => default map.
>>>>    
>>>>> Meaning one could reset the map and still pass in extensions.
>>>>
>>>> I missed that you were pointing out we didn't follow the extensions on
>>>> resetting.
>>>>
>>>> I'm not sure if that makes sense tbh. The extensions are written around
>>>> the concept of applying to the new engines[], and if the use has
>>>> explicitly removed the engines[] (distinct from defining a zero array),
>>>> what extensions can apply? One hopes they end up -EINVAL. As they should
>>>> -EINVAL, I guess it is no harm done to apply them.
>>>
>>> Ok, the problem with the size=0 case is that quite obviously there is no
>>> extension chain to follow. (That was silly of me.)
>>>
>>> I think
>>>        .size = 0 => reset to default
>>> and
>>>        .size = sizeof(arg) => 0 engines ([])
>>
>> What is the difference between these two?
> 
> One uses the legacy ring mask, and the other is a context with no
> engines.

The latter is hypothetical, no? Current patches don't have this ability 
AFAIR. Why would we want this?

>>>        .size = sizeof(arg) + N*sizeof(*class_instance) => N engines ([N])
>>> make the most logical sense, which does imply that if you want to apply
>>> extension options to ctx->engines[] you need to specify them.
>>>
>>> And that also implies that if we have an extension that may make sense
>>> to the default setup, then either we say creating the engine[] map is
>>> compulsory, or we don't use a set-engines extension for that.
>>
>> Hm.. load_balance is extension of set_engines. If we wanted to go crazy
>> we could also support it directly from set_param(param=LOAD_BALANCE)?
> 
> My thinking was that it only made sense as a constructor property. (Back
> before we hooked set-parameter for constructor properties). I still like
> how set-engines + extensions has turned out. (But haven't coded up the
> alternatives, so who knows.)

Everyone if fine with it so lets not change it much. :)

Regards,

Tvrtko
Chris Wilson March 11, 2019, 4:52 p.m. UTC | #10
Quoting Tvrtko Ursulin (2019-03-11 16:34:41)
> 
> On 11/03/2019 16:22, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-03-11 16:16:27)
> >>
> >> On 11/03/2019 14:45, Chris Wilson wrote:
> >>> Quoting Chris Wilson (2019-03-11 09:45:17)
> >>>> Quoting Tvrtko Ursulin (2019-03-11 09:23:44)
> >>>>>
> >>>>> On 08/03/2019 16:47, Chris Wilson wrote:
> >>>>>> Quoting Tvrtko Ursulin (2019-03-08 16:27:22)
> >>>>>>>
> >>>>>>> On 08/03/2019 14:12, Chris Wilson wrote:
> >>>>>>>> +static int
> >>>>>>>> +set_engines(struct i915_gem_context *ctx,
> >>>>>>>> +         const struct drm_i915_gem_context_param *args)
> >>>>>>>> +{
> >>>>>>>> +     struct i915_context_param_engines __user *user;
> >>>>>>>> +     struct set_engines set = { .ctx = ctx };
> >>>>>>>> +     u64 size, extensions;
> >>>>>>>> +     unsigned int n;
> >>>>>>>> +     int err;
> >>>>>>>> +
> >>>>>>>> +     user = u64_to_user_ptr(args->value);
> >>>>>>>> +     size = args->size;
> >>>>>>>> +     if (!size)
> >>>>>>>> +             goto out;
> >>>>>>>
> >>>>>>> This prevents a hypothetical extension with empty map data.
> >>>>>>
> >>>>>> No... This is required for resetting and I think that's covered in what
> >>>>>> little docs there are. It's the set.nengine==0 test later
> >>>>>> that you mean to object to. But we can't do that as that's how we
> >>>>>> differentiate between modes at the moment.
> >>>>>>
> >>>>>> We could use ctx->nengine = 0 and ctx->engines = ZERO_PTR.
> >>>>>
> >>>>> size == sizeof(struct i915_context_param_engines) could mean reset -
> >>>>> meaning no map array provided.
> >>>>
> >>>> Nah, size=sizeof() => 0 [], size=0 => default map.
> >>>>    
> >>>>> Meaning one could reset the map and still pass in extensions.
> >>>>
> >>>> I missed that you were pointing out we didn't follow the extensions on
> >>>> resetting.
> >>>>
> >>>> I'm not sure if that makes sense tbh. The extensions are written around
> >>>> the concept of applying to the new engines[], and if the use has
> >>>> explicitly removed the engines[] (distinct from defining a zero array),
> >>>> what extensions can apply? One hopes they end up -EINVAL. As they should
> >>>> -EINVAL, I guess it is no harm done to apply them.
> >>>
> >>> Ok, the problem with the size=0 case is that quite obviously there is no
> >>> extension chain to follow. (That was silly of me.)
> >>>
> >>> I think
> >>>        .size = 0 => reset to default
> >>> and
> >>>        .size = sizeof(arg) => 0 engines ([])
> >>
> >> What is the difference between these two?
> > 
> > One uses the legacy ring mask, and the other is a context with no
> > engines.
> 
> The latter is hypothetical, no? Current patches don't have this ability 
> AFAIR. Why would we want this?

They did the moment you asked for it. Do you not recall? :-p
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2cfc68b66944..86d9bea6f275 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -101,6 +101,21 @@  static struct i915_global_gem_context {
 	struct kmem_cache *slab_luts;
 } global;
 
+static struct intel_engine_cs *
+lookup_user_engine(struct i915_gem_context *ctx,
+		   unsigned long flags, u16 class, u16 instance)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+	if (flags & LOOKUP_USER_INDEX) {
+		if (instance >= ctx->nengine)
+			return NULL;
+
+		return ctx->engines[instance];
+	}
+
+	return intel_engine_lookup_user(ctx->i915, class, instance);
+}
+
 struct i915_lut_handle *i915_lut_handle_alloc(void)
 {
 	return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
@@ -234,6 +249,8 @@  static void i915_gem_context_free(struct i915_gem_context *ctx)
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
+	kfree(ctx->engines);
+
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
 		it->ops->destroy(it);
 
@@ -1311,9 +1328,9 @@  static int set_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
+	engine = lookup_user_engine(ctx, 0,
+				    user_sseu.engine_class,
+				    user_sseu.engine_instance);
 	if (!engine)
 		return -EINVAL;
 
@@ -1331,9 +1348,154 @@  static int set_sseu(struct i915_gem_context *ctx,
 
 	args->size = sizeof(user_sseu);
 
+	return 0;
+};
+
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct set_engines set = { .ctx = ctx };
+	u64 size, extensions;
+	unsigned int n;
+	int err;
+
+	user = u64_to_user_ptr(args->value);
+	size = args->size;
+	if (!size)
+		goto out;
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
+	if (size < sizeof(*user) || size % sizeof(*user->class_instance))
+		return -EINVAL;
+
+	set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
+	if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK + 1)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.nengine,
+				    sizeof(*set.engines),
+				    GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	for (n = 0; n < set.nengine; n++) {
+		u16 class, inst;
+
+		if (get_user(class, &user->class_instance[n].engine_class) ||
+		    get_user(inst, &user->class_instance[n].engine_instance)) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		if (class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines[n] = NULL;
+			continue;
+		}
+
+		set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
+		if (!set.engines[n]) {
+			kfree(set.engines);
+			return -ENOENT;
+		}
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+out:
+	mutex_lock(&ctx->i915->drm.struct_mutex);
+	kfree(ctx->engines);
+	ctx->engines = set.engines;
+	ctx->nengine = set.nengine;
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
 	return 0;
 }
 
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines *local;
+	unsigned int n, count, size;
+	int err = 0;
+
+restart:
+	count = READ_ONCE(ctx->nengine);
+	if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
+		return -ENOMEM; /* unrepresentable! */
+
+	size = sizeof(*local) + count * sizeof(*local->class_instance);
+	if (!args->size) {
+		args->size = size;
+		return 0;
+	}
+	if (args->size < size)
+		return -EINVAL;
+
+	local = kmalloc(size, GFP_KERNEL);
+	if (!local)
+		return -ENOMEM;
+
+	if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
+		err = -EINTR;
+		goto out;
+	}
+
+	if (READ_ONCE(ctx->nengine) != count) {
+		mutex_unlock(&ctx->i915->drm.struct_mutex);
+		kfree(local);
+		goto restart;
+	}
+
+	local->extensions = 0;
+	for (n = 0; n < count; n++) {
+		if (ctx->engines[n]) {
+			local->class_instance[n].engine_class =
+				ctx->engines[n]->uabi_class;
+			local->class_instance[n].engine_instance =
+				ctx->engines[n]->instance;
+		} else {
+			local->class_instance[n].engine_class =
+				I915_ENGINE_CLASS_INVALID;
+			local->class_instance[n].engine_instance =
+				I915_ENGINE_CLASS_INVALID_NONE;
+		}
+	}
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
+		err = -EFAULT;
+		goto out;
+	}
+	args->size = size;
+
+out:
+	kfree(local);
+	return err;
+}
+
 static int ctx_setparam(struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
 {
@@ -1406,6 +1568,10 @@  static int ctx_setparam(struct i915_gem_context *ctx,
 		ret = set_ppgtt(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1459,6 +1625,22 @@  static int clone_sseu(struct i915_gem_context *dst,
 	return 0;
 }
 
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct intel_engine_cs **engines;
+
+	engines = kmemdup(src->engines,
+			  sizeof(*src->engines) * src->nengine,
+			  GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	dst->engines = engines;
+	dst->nengine = src->nengine;
+	return 0;
+}
+
 static int create_clone(struct i915_user_extension __user *ext, void *data)
 {
 	struct drm_i915_gem_context_create_ext_clone local;
@@ -1514,6 +1696,12 @@  static int create_clone(struct i915_user_extension __user *ext, void *data)
 		i915_ppgtt_open(dst->ppgtt);
 	}
 
+	if (local.flags & I915_CONTEXT_CLONE_ENGINES && src->nengine) {
+		err = clone_engines(dst, src);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1632,9 +1820,9 @@  static int get_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
+	engine = lookup_user_engine(ctx, 0,
+				    user_sseu.engine_class,
+				    user_sseu.engine_instance);
 	if (!engine)
 		return -EINVAL;
 
@@ -1715,6 +1903,10 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_ppgtt(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index f8f6e6c960a7..8a89f3053f73 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -41,6 +41,8 @@  struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct intel_engine_cs **engines;
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -110,6 +112,8 @@  struct i915_gem_context {
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 
+	unsigned int nengine;
+
 	/**
 	 * @hw_id: - unique identifier for the context
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ee6d301a9627..67e4a0c2ebff 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2090,13 +2090,23 @@  static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 };
 
 static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
+	if (eb->ctx->engines) {
+		if (user_ring_id >= eb->ctx->nengine) {
+			DRM_DEBUG("execbuf with unknown ring: %u\n",
+				  user_ring_id);
+			return NULL;
+		}
+
+		return eb->ctx->engines[user_ring_id];
+	}
+
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return NULL;
@@ -2109,11 +2119,11 @@  eb_select_engine(struct drm_i915_private *dev_priv,
 		return NULL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2124,9 +2134,9 @@  eb_select_engine(struct drm_i915_private *dev_priv,
 			return NULL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
@@ -2336,7 +2346,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
+	eb.engine = eb_select_engine(&eb, file, args);
 	if (!eb.engine) {
 		err = -EINVAL;
 		goto err_engine;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 50d154954d5f..00147b990e63 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -124,6 +124,8 @@  enum drm_i915_gem_engine_class {
 	I915_ENGINE_CLASS_INVALID	= -1
 };
 
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+
 /**
  * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
  *
@@ -1509,6 +1511,26 @@  struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1573,6 +1595,23 @@  struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+
+	struct {
+		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
+		__u16 engine_instance;
+	} class_instance[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct { \
+		__u16 engine_class; \
+		__u16 engine_instance; \
+	} class_instance[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
@@ -1589,7 +1628,8 @@  struct drm_i915_gem_context_create_ext_clone {
 #define I915_CONTEXT_CLONE_SSEU		(1u << 2)
 #define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
 #define I915_CONTEXT_CLONE_VM		(1u << 4)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
 	__u64 rsvd;
 };