diff mbox series

[37/40] drm/i915: Allow a context to define its set of engines

Message ID 20180919195544.1511-37-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/40] drm: Use default dma_fence hooks where possible for null syncobj | expand

Commit Message

Chris Wilson Sept. 19, 2018, 7:55 p.m. UTC
Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
 include/uapi/drm/i915_drm.h                | 27 +++++++
 4 files changed, 135 insertions(+), 6 deletions(-)

Comments

Tvrtko Ursulin Sept. 27, 2018, 11:28 a.m. UTC | #1
On 19/09/2018 20:55, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
> 
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
>   include/uapi/drm/i915_drm.h                | 27 +++++++
>   4 files changed, 135 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index a8570a07b3b7..313471253f51 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -90,6 +90,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> +#include "i915_user_extensions.h"
>   #include "intel_workarounds.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   			ce->ops->destroy(ce);
>   	}
>   
> +	kfree(ctx->engines);
> +
>   	if (ctx->timeline)
>   		i915_timeline_put(ctx->timeline);
>   
> @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +struct set_engines {
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs **engines;
> +	unsigned int nengine;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};

This is OK unless someone one day gets the desire to make the extension 
namespace sparse. I was thinking on how to put some warnings in the code 
to warn about it, but I think that's for later. Namespace is also per 
parent ioctl, another thing which would perhaps need enforcing.

> +
> +static int set_engines(struct i915_gem_context *ctx,
> +		       struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user;
> +	struct set_engines set = {
> +		.ctx = ctx,
> +		.engines = NULL,
> +		.nengine = -1,
> +	};
> +	unsigned int n;
> +	u64 size, extensions;

Size is u32 in the uAPI, so either that or unsigned int would do here.

> +	int err;
> +
> +	user = u64_to_user_ptr(args->value);
> +	size = args->size;
> +	if (!size)

args->sizes = sizeof(*user);

... if you want to allow size probing via set param, or we add a 
get_param (too much work for nothing?) and only allow probing from there?

> +		goto out;
> +
> +	if (size < sizeof(struct i915_context_param_engines))
> +		return -EINVAL;
> +
> +	size -= sizeof(struct i915_context_param_engines);
> +	if (size % sizeof(*user->class_instance))
> +		return -EINVAL;
> +
> +	set.nengine = size / sizeof(*user->class_instance);
> +	if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
> +		return -EINVAL;
> +
> +	set.engines = kmalloc_array(set.nengine + 1,
> +				    sizeof(*set.engines),
> +				    GFP_KERNEL);
> +	if (!set.engines)
> +		return -ENOMEM;
> +
> +	set.engines[0] = NULL;

/* Reserve the I915_EXEC_DEFAULT slot. */

> +	for (n = 0; n < set.nengine; n++) {
> +		u32 class, instance;

I will later recommend we use u16 for class/instance. I think we settled 
for that in the meantime.

> +
> +		if (get_user(class, &user->class_instance[n].class) ||
> +		    get_user(instance, &user->class_instance[n].instance)) {
> +			kfree(set.engines);
> +			return -EFAULT;
> +		}
> +
> +		set.engines[n + 1] =
> +			intel_engine_lookup_user(ctx->i915, class, instance);
> +		if (!set.engines[n + 1]) {
> +			kfree(set.engines);
> +			return -ENOENT;
> +		}
> +	}
> +
> +	err = -EFAULT;
> +	if (!get_user(extensions, &user->extensions))
> +		err = i915_user_extensions(u64_to_user_ptr(extensions),
> +					   set_engines__extensions,
> +					   ARRAY_SIZE(set_engines__extensions),
> +					   &set);
> +	if (err) {

Do we need an user extensions destructor table for a more 
compartmentalized design? This actually applies to the 
i915_user_extensions() implementation.. it would need to take two 
function tables I think and unwind until the one that failed.

> +		kfree(set.engines);
> +		return err;
> +	}
> +
> +out:
> +	kfree(ctx->engines);
> +	ctx->engines = set.engines;
> +	ctx->nengine = set.nengine + 1;
> +
> +	return 0;
> +}
> +
>   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file)
>   {
> @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		}
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = set_engines(ctx, args);
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 770043449ba6..9f89119a6566 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -67,6 +67,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct intel_engine_cs **engines;
> +
>   	struct i915_timeline *timeline;
>   
>   	/**
> @@ -135,6 +137,8 @@ struct i915_gem_context {
>   #define CONTEXT_CLOSED			1
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   
> +	unsigned int nengine;

Why put related fields apart?

> +
>   	/**
>   	 * @hw_id: - unique identifier for the context
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 109486a6ef07..faedfdca875d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>   };
>   
>   static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +eb_select_engine(struct i915_execbuffer *eb,
>   		 struct drm_file *file,
>   		 struct drm_i915_gem_execbuffer2 *args)
>   {
>   	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>   	struct intel_engine_cs *engine;
>   
> +	if (eb->ctx->engines) {
> +		if (user_ring_id >= eb->ctx->nengine) {
> +			DRM_DEBUG("execbuf with unknown ring: %u\n",
> +				  user_ring_id);
> +			return NULL;
> +		}
> +
> +		return eb->ctx->engines[user_ring_id];
> +	}
> +
>   	if (user_ring_id > I915_USER_RINGS) {
>   		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>   		return NULL;
> @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   		return NULL;
>   	}
>   
> -	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
>   		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>   
>   		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> -			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> +			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>   		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>   			   bsd_idx <= I915_EXEC_BSD_RING2) {
>   			bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   			return NULL;
>   		}
>   
> -		engine = dev_priv->engine[_VCS(bsd_idx)];
> +		engine = eb->i915->engine[_VCS(bsd_idx)];
>   	} else {
> -		engine = dev_priv->engine[user_ring_map[user_ring_id]];
> +		engine = eb->i915->engine[user_ring_map[user_ring_id]];
>   	}
>   
>   	if (!engine) {
> @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_destroy;
>   
> -	eb.engine = eb_select_engine(eb.i915, file, args);
> +	eb.engine = eb_select_engine(&eb, file, args);
>   	if (!eb.engine) {
>   		err = -EINVAL;
>   		goto err_engine;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 65b0b84419f3..d41b4c673af4 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. The array created is offset by 1, such that by default
> + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
> + * are then filled in using the specified (class, instance).

Should we allow specifying the default index? I am wondering if that 
would make life easier to any userspace.

> + *
> + * Setting the number of engines bound to the context will revert back to
> + * default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES	0x7
> +
>   	__u64 value;
>   };
>   
> +struct i915_context_param_engines {
> +	__u64 extensions;
> +
> +	struct {
> +		__u32 class; /* see enum drm_i915_gem_engine_class */
> +		__u32 instance;

u16 here as mentioned earlier.

> +	} class_instance[0];
> +};
> +
>   enum drm_i915_oa_format {
>   	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
>   	I915_OA_FORMAT_A29,	    /* HSW only */
> 

Regards,

Tvrtko
Chris Wilson Sept. 28, 2018, 8:22 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-09-27 12:28:47)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Over the last few years, we have debated how to extend the user API to
> > support an increase in the number of engines, that may be sparse and
> > even be heterogeneous within a class (not all video decoders created
> > equal). We settled on using (class, instance) tuples to identify a
> > specific engine, with an API for the user to construct a map of engines
> > to capabilities. Into this picture, we then add a challenge of virtual
> > engines; one user engine that maps behind the scenes to any number of
> > physical engines. To keep it general, we want the user to have full
> > control over that mapping. To that end, we allow the user to constrain a
> > context to define the set of engines that it can access, order fully
> > controlled by the user via (class, instance). With such precise control
> > in context setup, we can continue to use the existing execbuf uABI of
> > specifying a single index; only now it doesn't automagically map onto
> > the engines, it uses the user defined engine map from the context.
> > 
> > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> > execbuf. It's use will be revealed in the next patch.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
> >   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
> >   include/uapi/drm/i915_drm.h                | 27 +++++++
> >   4 files changed, 135 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index a8570a07b3b7..313471253f51 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -90,6 +90,7 @@
> >   #include <drm/i915_drm.h>
> >   #include "i915_drv.h"
> >   #include "i915_trace.h"
> > +#include "i915_user_extensions.h"
> >   #include "intel_workarounds.h"
> >   
> >   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> > @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> >                       ce->ops->destroy(ce);
> >       }
> >   
> > +     kfree(ctx->engines);
> > +
> >       if (ctx->timeline)
> >               i915_timeline_put(ctx->timeline);
> >   
> > @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >       return ret;
> >   }
> >   
> > +struct set_engines {
> > +     struct i915_gem_context *ctx;
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +};
> > +
> > +static const i915_user_extension_fn set_engines__extensions[] = {
> > +};
> 
> This is OK unless someone one day gets the desire to make the extension 
> namespace sparse. I was thinking on how to put some warnings in the code 
> to warn about it, but I think that's for later. Namespace is also per 
> parent ioctl, another thing which would perhaps need enforcing.

Unless you intend to go extremely sparse, I'd just leave it with the
usual [NAME1] = func1, and skipping over NULLs. We can always add
alternatives if need be (I'd actually been meaning to convert execbuf3
over to this scheme to flesh it out a bit more).

> > +static int set_engines(struct i915_gem_context *ctx,
> > +                    struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines __user *user;
> > +     struct set_engines set = {
> > +             .ctx = ctx,
> > +             .engines = NULL,
> > +             .nengine = -1,
> > +     };
> > +     unsigned int n;
> > +     u64 size, extensions;
> 
> Size is u32 in the uAPI, so either that or unsigned int would do here.
> 
> > +     int err;
> > +
> > +     user = u64_to_user_ptr(args->value);
> > +     size = args->size;
> > +     if (!size)
> 
> args->sizes = sizeof(*user);
> 
> ... if you want to allow size probing via set param, or we add a 
> get_param (too much work for nothing?) and only allow probing from there?

It's a variable array, so a little trickier.

> > +             goto out;
> > +
> > +     if (size < sizeof(struct i915_context_param_engines))
> > +             return -EINVAL;
> > +
> > +     size -= sizeof(struct i915_context_param_engines);
> > +     if (size % sizeof(*user->class_instance))
> > +             return -EINVAL;
> > +
> > +     set.nengine = size / sizeof(*user->class_instance);
> > +     if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
> > +             return -EINVAL;
> > +
> > +     set.engines = kmalloc_array(set.nengine + 1,
> > +                                 sizeof(*set.engines),
> > +                                 GFP_KERNEL);
> > +     if (!set.engines)
> > +             return -ENOMEM;
> > +
> > +     set.engines[0] = NULL;
> 
> /* Reserve the I915_EXEC_DEFAULT slot. */
> 
> > +     for (n = 0; n < set.nengine; n++) {
> > +             u32 class, instance;
> 
> I will later recommend we use u16 for class/instance. I think we settled 
> for that in the meantime.
> 
> > +
> > +             if (get_user(class, &user->class_instance[n].class) ||
> > +                 get_user(instance, &user->class_instance[n].instance)) {
> > +                     kfree(set.engines);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             set.engines[n + 1] =
> > +                     intel_engine_lookup_user(ctx->i915, class, instance);
> > +             if (!set.engines[n + 1]) {
> > +                     kfree(set.engines);
> > +                     return -ENOENT;
> > +             }
> > +     }
> > +
> > +     err = -EFAULT;
> > +     if (!get_user(extensions, &user->extensions))
> > +             err = i915_user_extensions(u64_to_user_ptr(extensions),
> > +                                        set_engines__extensions,
> > +                                        ARRAY_SIZE(set_engines__extensions),
> > +                                        &set);
> > +     if (err) {
> 
> Do we need an user extensions destructor table for a more 
> compartmentalized design? This actually applies to the 
> i915_user_extensions() implementation.. it would need to take two 
> function tables I think and unwind until the one that failed.

Yeah, dtor for unwind is intriguing.

> > +             kfree(set.engines);
> > +             return err;
> > +     }
> > +
> > +out:
> > +     kfree(ctx->engines);
> > +     ctx->engines = set.engines;
> > +     ctx->nengine = set.nengine + 1;
> > +
> > +     return 0;
> > +}
> > +
> >   int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >                                   struct drm_file *file)
> >   {
> > @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >               }
> >               break;
> >   
> > +     case I915_CONTEXT_PARAM_ENGINES:
> > +             ret = set_engines(ctx, args);
> > +             break;
> > +
> >       default:
> >               ret = -EINVAL;
> >               break;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 770043449ba6..9f89119a6566 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -67,6 +67,8 @@ struct i915_gem_context {
> >       /** file_priv: owning file descriptor */
> >       struct drm_i915_file_private *file_priv;
> >   
> > +     struct intel_engine_cs **engines;
> > +
> >       struct i915_timeline *timeline;
> >   
> >       /**
> > @@ -135,6 +137,8 @@ struct i915_gem_context {
> >   #define CONTEXT_CLOSED                      1
> >   #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
> >   
> > +     unsigned int nengine;
> 
> Why put related fields apart?

Packing.

> 
> > +
> >       /**
> >        * @hw_id: - unique identifier for the context
> >        *
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 109486a6ef07..faedfdca875d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> >   };
> >   
> >   static struct intel_engine_cs *
> > -eb_select_engine(struct drm_i915_private *dev_priv,
> > +eb_select_engine(struct i915_execbuffer *eb,
> >                struct drm_file *file,
> >                struct drm_i915_gem_execbuffer2 *args)
> >   {
> >       unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
> >       struct intel_engine_cs *engine;
> >   
> > +     if (eb->ctx->engines) {
> > +             if (user_ring_id >= eb->ctx->nengine) {
> > +                     DRM_DEBUG("execbuf with unknown ring: %u\n",
> > +                               user_ring_id);
> > +                     return NULL;
> > +             }
> > +
> > +             return eb->ctx->engines[user_ring_id];
> > +     }
> > +
> >       if (user_ring_id > I915_USER_RINGS) {
> >               DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
> >               return NULL;
> > @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> >               return NULL;
> >       }
> >   
> > -     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
> > +     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
> >               unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
> >   
> >               if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> > -                     bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> > +                     bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
> >               } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
> >                          bsd_idx <= I915_EXEC_BSD_RING2) {
> >                       bsd_idx >>= I915_EXEC_BSD_SHIFT;
> > @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> >                       return NULL;
> >               }
> >   
> > -             engine = dev_priv->engine[_VCS(bsd_idx)];
> > +             engine = eb->i915->engine[_VCS(bsd_idx)];
> >       } else {
> > -             engine = dev_priv->engine[user_ring_map[user_ring_id]];
> > +             engine = eb->i915->engine[user_ring_map[user_ring_id]];
> >       }
> >   
> >       if (!engine) {
> > @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >       if (unlikely(err))
> >               goto err_destroy;
> >   
> > -     eb.engine = eb_select_engine(eb.i915, file, args);
> > +     eb.engine = eb_select_engine(&eb, file, args);
> >       if (!eb.engine) {
> >               err = -EINVAL;
> >               goto err_engine;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 65b0b84419f3..d41b4c673af4 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
> >   #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
> >   #define   I915_CONTEXT_DEFAULT_PRIORITY             0
> >   #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
> > +
> > +/*
> > + * I915_CONTEXT_PARAM_ENGINES:
> > + *
> > + * Bind this context to operate on this subset of available engines. Henceforth,
> > + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> > + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> > + * and upwards. The array created is offset by 1, such that by default
> > + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
> > + * are then filled in using the specified (class, instance).
> 
> Should we allow specifying the default index? I am wondering if that 
> would make life easier to any userspace.

Could do, certainly gets rid of the clumsy nengines+1, but the skip for
unknown class may make up for the clumsyness :)
-Chris
Tvrtko Ursulin Oct. 1, 2018, 8:30 a.m. UTC | #3
On 28/09/2018 21:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-27 12:28:47)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> Over the last few years, we have debated how to extend the user API to
>>> support an increase in the number of engines, that may be sparse and
>>> even be heterogeneous within a class (not all video decoders created
>>> equal). We settled on using (class, instance) tuples to identify a
>>> specific engine, with an API for the user to construct a map of engines
>>> to capabilities. Into this picture, we then add a challenge of virtual
>>> engines; one user engine that maps behind the scenes to any number of
>>> physical engines. To keep it general, we want the user to have full
>>> control over that mapping. To that end, we allow the user to constrain a
>>> context to define the set of engines that it can access, order fully
>>> controlled by the user via (class, instance). With such precise control
>>> in context setup, we can continue to use the existing execbuf uABI of
>>> specifying a single index; only now it doesn't automagically map onto
>>> the engines, it uses the user defined engine map from the context.
>>>
>>> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
>>> execbuf. It's use will be revealed in the next patch.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
>>>    drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
>>>    drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
>>>    include/uapi/drm/i915_drm.h                | 27 +++++++
>>>    4 files changed, 135 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index a8570a07b3b7..313471253f51 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -90,6 +90,7 @@
>>>    #include <drm/i915_drm.h>
>>>    #include "i915_drv.h"
>>>    #include "i915_trace.h"
>>> +#include "i915_user_extensions.h"
>>>    #include "intel_workarounds.h"
>>>    
>>>    #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>>> @@ -223,6 +224,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>>>                        ce->ops->destroy(ce);
>>>        }
>>>    
>>> +     kfree(ctx->engines);
>>> +
>>>        if (ctx->timeline)
>>>                i915_timeline_put(ctx->timeline);
>>>    
>>> @@ -948,6 +951,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>>        return ret;
>>>    }
>>>    
>>> +struct set_engines {
>>> +     struct i915_gem_context *ctx;
>>> +     struct intel_engine_cs **engines;
>>> +     unsigned int nengine;
>>> +};
>>> +
>>> +static const i915_user_extension_fn set_engines__extensions[] = {
>>> +};
>>
>> This is OK unless someone one day gets the desire to make the extension
>> namespace sparse. I was thinking on how to put some warnings in the code
>> to warn about it, but I think that's for later. Namespace is also per
>> parent ioctl, another thing which would perhaps need enforcing.
> 
> Unless you intend to go extremely sparse, I'd just leave it with the
> usual [NAME1] = func1, and skipping over NULLs. We can always add
> alternatives if need be (I'd actually been meaning to convert execbuf3
> over to this scheme to flesh it out a bit more).

It is fine for now, I was just thinking out loud how to protect us 
against the table accidentally growing huge one day.

>>> +static int set_engines(struct i915_gem_context *ctx,
>>> +                    struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines __user *user;
>>> +     struct set_engines set = {
>>> +             .ctx = ctx,
>>> +             .engines = NULL,
>>> +             .nengine = -1,
>>> +     };
>>> +     unsigned int n;
>>> +     u64 size, extensions;
>>
>> Size is u32 in the uAPI, so either that or unsigned int would do here.
>>
>>> +     int err;
>>> +
>>> +     user = u64_to_user_ptr(args->value);
>>> +     size = args->size;
>>> +     if (!size)
>>
>> args->sizes = sizeof(*user);
>>
>> ... if you want to allow size probing via set param, or we add a
>> get_param (too much work for nothing?) and only allow probing from there?
> 
> It's a variable array, so a little trickier.

Indeed!

> 
>>> +             goto out;
>>> +
>>> +     if (size < sizeof(struct i915_context_param_engines))
>>> +             return -EINVAL;
>>> +
>>> +     size -= sizeof(struct i915_context_param_engines);
>>> +     if (size % sizeof(*user->class_instance))
>>> +             return -EINVAL;
>>> +
>>> +     set.nengine = size / sizeof(*user->class_instance);
>>> +     if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
>>> +             return -EINVAL;
>>> +
>>> +     set.engines = kmalloc_array(set.nengine + 1,
>>> +                                 sizeof(*set.engines),
>>> +                                 GFP_KERNEL);
>>> +     if (!set.engines)
>>> +             return -ENOMEM;
>>> +
>>> +     set.engines[0] = NULL;
>>
>> /* Reserve the I915_EXEC_DEFAULT slot. */
>>
>>> +     for (n = 0; n < set.nengine; n++) {
>>> +             u32 class, instance;
>>
>> I will later recommend we use u16 for class/instance. I think we settled
>> for that in the meantime.
>>
>>> +
>>> +             if (get_user(class, &user->class_instance[n].class) ||
>>> +                 get_user(instance, &user->class_instance[n].instance)) {
>>> +                     kfree(set.engines);
>>> +                     return -EFAULT;
>>> +             }
>>> +
>>> +             set.engines[n + 1] =
>>> +                     intel_engine_lookup_user(ctx->i915, class, instance);
>>> +             if (!set.engines[n + 1]) {
>>> +                     kfree(set.engines);
>>> +                     return -ENOENT;
>>> +             }
>>> +     }
>>> +
>>> +     err = -EFAULT;
>>> +     if (!get_user(extensions, &user->extensions))
>>> +             err = i915_user_extensions(u64_to_user_ptr(extensions),
>>> +                                        set_engines__extensions,
>>> +                                        ARRAY_SIZE(set_engines__extensions),
>>> +                                        &set);
>>> +     if (err) {
>>
>> Do we need an user extensions destructor table for a more
>> compartmentalized design? This actually applies to the
>> i915_user_extensions() implementation.. it would need to take two
>> function tables I think and unwind until the one that failed.
> 
> Yeah, dtor for unwind is intriguing.
> 
>>> +             kfree(set.engines);
>>> +             return err;
>>> +     }
>>> +
>>> +out:
>>> +     kfree(ctx->engines);
>>> +     ctx->engines = set.engines;
>>> +     ctx->nengine = set.nengine + 1;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>>    int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>                                    struct drm_file *file)
>>>    {
>>> @@ -1011,6 +1095,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>                }
>>>                break;
>>>    
>>> +     case I915_CONTEXT_PARAM_ENGINES:
>>> +             ret = set_engines(ctx, args);
>>> +             break;
>>> +
>>>        default:
>>>                ret = -EINVAL;
>>>                break;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 770043449ba6..9f89119a6566 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -67,6 +67,8 @@ struct i915_gem_context {
>>>        /** file_priv: owning file descriptor */
>>>        struct drm_i915_file_private *file_priv;
>>>    
>>> +     struct intel_engine_cs **engines;
>>> +
>>>        struct i915_timeline *timeline;
>>>    
>>>        /**
>>> @@ -135,6 +137,8 @@ struct i915_gem_context {
>>>    #define CONTEXT_CLOSED                      1
>>>    #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
>>>    
>>> +     unsigned int nengine;
>>
>> Why put related fields apart?
> 
> Packing.

Hm it feels to far. It's one per execbuf lookup so not that hot. Move 
the engines and nengines down to after struct head_rcu? And add comments 
against the members of course! :)

> 
>>
>>> +
>>>        /**
>>>         * @hw_id: - unique identifier for the context
>>>         *
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> index 109486a6ef07..faedfdca875d 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> @@ -2014,13 +2014,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>>>    };
>>>    
>>>    static struct intel_engine_cs *
>>> -eb_select_engine(struct drm_i915_private *dev_priv,
>>> +eb_select_engine(struct i915_execbuffer *eb,
>>>                 struct drm_file *file,
>>>                 struct drm_i915_gem_execbuffer2 *args)
>>>    {
>>>        unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>>>        struct intel_engine_cs *engine;
>>>    
>>> +     if (eb->ctx->engines) {
>>> +             if (user_ring_id >= eb->ctx->nengine) {
>>> +                     DRM_DEBUG("execbuf with unknown ring: %u\n",
>>> +                               user_ring_id);
>>> +                     return NULL;
>>> +             }
>>> +
>>> +             return eb->ctx->engines[user_ring_id];
>>> +     }
>>> +
>>>        if (user_ring_id > I915_USER_RINGS) {
>>>                DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>>>                return NULL;
>>> @@ -2033,11 +2043,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>>>                return NULL;
>>>        }
>>>    
>>> -     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
>>> +     if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
>>>                unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>>>    
>>>                if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
>>> -                     bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
>>> +                     bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>>>                } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>>>                           bsd_idx <= I915_EXEC_BSD_RING2) {
>>>                        bsd_idx >>= I915_EXEC_BSD_SHIFT;
>>> @@ -2048,9 +2058,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>>>                        return NULL;
>>>                }
>>>    
>>> -             engine = dev_priv->engine[_VCS(bsd_idx)];
>>> +             engine = eb->i915->engine[_VCS(bsd_idx)];
>>>        } else {
>>> -             engine = dev_priv->engine[user_ring_map[user_ring_id]];
>>> +             engine = eb->i915->engine[user_ring_map[user_ring_id]];
>>>        }
>>>    
>>>        if (!engine) {
>>> @@ -2260,7 +2270,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>>        if (unlikely(err))
>>>                goto err_destroy;
>>>    
>>> -     eb.engine = eb_select_engine(eb.i915, file, args);
>>> +     eb.engine = eb_select_engine(&eb, file, args);
>>>        if (!eb.engine) {
>>>                err = -EINVAL;
>>>                goto err_engine;
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 65b0b84419f3..d41b4c673af4 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -1508,9 +1508,36 @@ struct drm_i915_gem_context_param {
>>>    #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
>>>    #define   I915_CONTEXT_DEFAULT_PRIORITY             0
>>>    #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
>>> +
>>> +/*
>>> + * I915_CONTEXT_PARAM_ENGINES:
>>> + *
>>> + * Bind this context to operate on this subset of available engines. Henceforth,
>>> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
>>> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
>>> + * and upwards. The array created is offset by 1, such that by default
>>> + * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
>>> + * are then filled in using the specified (class, instance).
>>
>> Should we allow specifying the default index? I am wondering if that
>> would make life easier to any userspace.
> 
> Could do, certainly gets rid of the clumsy nengines+1, but the skip for
> unknown class may make up for the clumsyness :)

Yeah I don't know. Userspace has to create a context at which point 
there isn't much to gain with the default index capability. So it's 
probably pointless.

I guess what I am thinking of is how we'll make IGT use new engines 
without too much churn, since we dropped the plain class/instance 
execbuf. But the default index idea doesn't help that anyway.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a8570a07b3b7..313471253f51 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@ 
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -223,6 +224,8 @@  static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
+	kfree(ctx->engines);
+
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
 
@@ -948,6 +951,87 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int set_engines(struct i915_gem_context *ctx,
+		       struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct set_engines set = {
+		.ctx = ctx,
+		.engines = NULL,
+		.nengine = -1,
+	};
+	unsigned int n;
+	u64 size, extensions;
+	int err;
+
+	user = u64_to_user_ptr(args->value);
+	size = args->size;
+	if (!size)
+		goto out;
+
+	if (size < sizeof(struct i915_context_param_engines))
+		return -EINVAL;
+
+	size -= sizeof(struct i915_context_param_engines);
+	if (size % sizeof(*user->class_instance))
+		return -EINVAL;
+
+	set.nengine = size / sizeof(*user->class_instance);
+	if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.nengine + 1,
+				    sizeof(*set.engines),
+				    GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	set.engines[0] = NULL;
+	for (n = 0; n < set.nengine; n++) {
+		u32 class, instance;
+
+		if (get_user(class, &user->class_instance[n].class) ||
+		    get_user(instance, &user->class_instance[n].instance)) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		set.engines[n + 1] =
+			intel_engine_lookup_user(ctx->i915, class, instance);
+		if (!set.engines[n + 1]) {
+			kfree(set.engines);
+			return -ENOENT;
+		}
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+out:
+	kfree(ctx->engines);
+	ctx->engines = set.engines;
+	ctx->nengine = set.nengine + 1;
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -1011,6 +1095,10 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 770043449ba6..9f89119a6566 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -67,6 +67,8 @@  struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct intel_engine_cs **engines;
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -135,6 +137,8 @@  struct i915_gem_context {
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 
+	unsigned int nengine;
+
 	/**
 	 * @hw_id: - unique identifier for the context
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 109486a6ef07..faedfdca875d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2014,13 +2014,23 @@  static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 };
 
 static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
+	if (eb->ctx->engines) {
+		if (user_ring_id >= eb->ctx->nengine) {
+			DRM_DEBUG("execbuf with unknown ring: %u\n",
+				  user_ring_id);
+			return NULL;
+		}
+
+		return eb->ctx->engines[user_ring_id];
+	}
+
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return NULL;
@@ -2033,11 +2043,11 @@  eb_select_engine(struct drm_i915_private *dev_priv,
 		return NULL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2048,9 +2058,9 @@  eb_select_engine(struct drm_i915_private *dev_priv,
 			return NULL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
@@ -2260,7 +2270,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
+	eb.engine = eb_select_engine(&eb, file, args);
 	if (!eb.engine) {
 		err = -EINVAL;
 		goto err_engine;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 65b0b84419f3..d41b4c673af4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1508,9 +1508,36 @@  struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. The array created is offset by 1, such that by default
+ * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
+ * are then filled in using the specified (class, instance).
+ *
+ * Setting the number of engines bound to the context will revert back to
+ * default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0x7
+
 	__u64 value;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions;
+
+	struct {
+		__u32 class; /* see enum drm_i915_gem_engine_class */
+		__u32 instance;
+	} class_instance[0];
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */