Message ID | 20190412085410.10392-23-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/50] drm/i915: Introduce struct class_instance for engines across the uAPI | expand |
On 12/04/2019 09:53, Chris Wilson wrote: > Over the last few years, we have debated how to extend the user API to > support an increase in the number of engines, that may be sparse and > even be heterogeneous within a class (not all video decoders created > equal). We settled on using (class, instance) tuples to identify a > specific engine, with an API for the user to construct a map of engines > to capabilities. Into this picture, we then add a challenge of virtual > engines; one user engine that maps behind the scenes to any number of > physical engines. To keep it general, we want the user to have full > control over that mapping. To that end, we allow the user to constrain a > context to define the set of engines that it can access, order fully > controlled by the user via (class, instance). With such precise control > in context setup, we can continue to use the existing execbuf uABI of > specifying a single index; only now it doesn't automagically map onto > the engines, it uses the user defined engine map from the context. > > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by > execbuf. It's use will be revealed in the next patch. > > v2: Fixup freeing of local on success of get_engines() > v3: Allow empty engines[] > v4: s/nengine/num_engines/ > v5: Replace 64 limit on num_engines with a note that execbuf is > currently limited to only using the first 64 engines. > > Testcase: igt/gem_ctx_engines > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v4 > --- > drivers/gpu/drm/i915/i915_gem_context.c | 219 +++++++++++++++++- > drivers/gpu/drm/i915/i915_gem_context.h | 18 ++ > drivers/gpu/drm/i915/i915_gem_context_types.h | 8 + > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +- > drivers/gpu/drm/i915/i915_utils.h | 36 +++ > include/uapi/drm/i915_drm.h | 31 +++ > 6 files changed, 310 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 9d3324439f1b..4e9411508dec 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -90,7 +90,6 @@ > #include <drm/i915_drm.h> > > #include "gt/intel_lrc_reg.h" > -#include "gt/intel_workarounds.h" > > #include "i915_drv.h" > #include "i915_globals.h" > @@ -143,13 +142,17 @@ static void lut_close(struct i915_gem_context *ctx) > static struct intel_context * > lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance) > { > - struct intel_engine_cs *engine; > + if (!i915_gem_context_user_engines(ctx)) { > + struct intel_engine_cs *engine; > > - engine = intel_engine_lookup_user(ctx->i915, class, instance); > - if (!engine) > - return ERR_PTR(-EINVAL); > + engine = intel_engine_lookup_user(ctx->i915, class, instance); > + if (!engine) > + return ERR_PTR(-EINVAL); > + > + instance = engine->id; > + } > > - return i915_gem_context_get_engine(ctx, engine->id); > + return i915_gem_context_get_engine(ctx, instance); > } > > static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) > @@ -257,6 +260,17 @@ static void free_engines(struct i915_gem_engines *e) > __free_engines(e, e->num_engines); > } > > +static void free_engines_rcu(struct work_struct *wrk) > +{ > + struct i915_gem_engines *e = > + container_of(wrk, struct i915_gem_engines, rcu.work); > + struct drm_i915_private *i915 = e->i915; > + > + mutex_lock(&i915->drm.struct_mutex); > + free_engines(e); > + mutex_unlock(&i915->drm.struct_mutex); > +} > + > static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) > { > struct intel_engine_cs *engine; > @@ -1385,6 +1399,191 @@ static int set_sseu(struct i915_gem_context *ctx, > return ret; > } > > +struct set_engines { > + struct i915_gem_context *ctx; > + struct i915_gem_engines *engines; > +}; > + > +static const i915_user_extension_fn set_engines__extensions[] = { > +}; > + > +static int > +set_engines(struct i915_gem_context *ctx, > + const struct drm_i915_gem_context_param *args) > +{ > + struct i915_context_param_engines __user *user = > + u64_to_user_ptr(args->value); > + struct set_engines set = { .ctx = ctx }; > + unsigned int num_engines, n; > + u64 extensions; > + int err; > + > + if (!args->size) { /* switch back to legacy user_ring_map */ > + if (!i915_gem_context_user_engines(ctx)) > + return 0; > + > + set.engines = default_engines(ctx); > + if (IS_ERR(set.engines)) > + return PTR_ERR(set.engines); > + > + goto replace; > + } > + > + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); > + if (args->size < sizeof(*user) || > + !IS_ALIGNED(args->size, sizeof(*user->engines))) { > + DRM_DEBUG("Invalid size for engine array: %d\n", > + args->size); > + return -EINVAL; > + } > + > + /* > + * Note that I915_EXEC_RING_MASK limits execbuf to only using the > + * first 64 engines defined here. > + */ > + num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); > + > + set.engines = kmalloc(struct_size(set.engines, engines, num_engines), > + GFP_KERNEL); > + if (!set.engines) > + return -ENOMEM; > + > + set.engines->i915 = ctx->i915; > + for (n = 0; n < num_engines; n++) { > + struct i915_engine_class_instance ci; > + struct intel_engine_cs *engine; > + > + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { > + __free_engines(set.engines, n); > + return -EFAULT; > + } > + > + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && > + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { > + set.engines->engines[n] = NULL; > + continue; > + } > + > + engine = intel_engine_lookup_user(ctx->i915, > + ci.engine_class, > + ci.engine_instance); > + if (!engine) { > + DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", Standardize early on "%u:%u" for class:instance? Although in this specific case I am not the debug is so useful. (There are many other ways the ioctl can fail.) Same goes for the earlier one. > + n, ci.engine_class, ci.engine_instance); > + __free_engines(set.engines, n); > + return -ENOENT; > + } > + > + set.engines->engines[n] = intel_context_create(ctx, engine); > + if (!set.engines->engines[n]) { > + __free_engines(set.engines, n); > + return -ENOMEM; > + } > + } > + set.engines->num_engines = num_engines; > + > + err = -EFAULT; > + if (!get_user(extensions, &user->extensions)) > + err = i915_user_extensions(u64_to_user_ptr(extensions), > + set_engines__extensions, > + ARRAY_SIZE(set_engines__extensions), > + &set); > + if (err) { > + free_engines(set.engines); > + return err; > + } > + > +replace: > + mutex_lock(&ctx->i915->drm.struct_mutex); > + if (args->size) > + i915_gem_context_set_user_engines(ctx); > + else > + i915_gem_context_clear_user_engines(ctx); > + rcu_swap_protected(ctx->engines, set.engines, 1); > + mutex_unlock(&ctx->i915->drm.struct_mutex); Hm so what is the new engine list mutex for? > + > + INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); > + queue_rcu_work(system_wq, &set.engines->rcu); > + > + return 0; > +} > + > +static int > +get_engines(struct i915_gem_context *ctx, > + struct drm_i915_gem_context_param *args) > +{ > + struct i915_context_param_engines __user *user; > + struct i915_gem_engines *e; > + size_t n, count, size; > + int err = 0; > + > + err = mutex_lock_interruptible(&ctx->engines_mutex); > + if (err) > + return err; > + > + if (!i915_gem_context_user_engines(ctx)) { > + args->size = 0; > + goto unlock; > + } > + > + e = i915_gem_context_engine_list(ctx); > + count = e->num_engines; > + > + /* Be paranoid in case we have an impedance mismatch */ > + if (!check_struct_size(user, engines, count, &size)) { > + err = -ENOMEM; > + goto unlock; > + } > + if (unlikely(overflows_type(size, args->size))) { I wouldn't bother with unlikely, none of the other failure modes does. > + err = -ENOMEM; > + goto unlock; Or abuse a bit -E2BIG for the two? > + } > + > + if (!args->size) { > + args->size = size; > + goto unlock; > + } > + > + if (args->size < size) { > + err = -EINVAL; > + goto unlock; > + } > + > + user = u64_to_user_ptr(args->value); > + if (!access_ok(user, size)) { > + err = -EFAULT; > + goto unlock; > + } > + > + if (put_user(0, &user->extensions)) { > + err = -EFAULT; Hm why? > + goto unlock; > + } > + > + for (n = 0; n < count; n++) { > + struct i915_engine_class_instance ci = { > + .engine_class = I915_ENGINE_CLASS_INVALID, > + .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, > + }; > + > + if (e->engines[n]) { > + ci.engine_class = e->engines[n]->engine->uabi_class; > + ci.engine_instance = e->engines[n]->engine->instance; > + } I would be tempted to put the invalid initializers into the else block. > + > + if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { __copy_to_user after access_ok passed? Or there were some changes in this area? > + err = -EFAULT; > + goto unlock; > + } > + } > + > + args->size = size; > + > +unlock: > + mutex_unlock(&ctx->engines_mutex); > + return err; > +} > + > static int ctx_setparam(struct drm_i915_file_private *fpriv, > struct i915_gem_context *ctx, > struct drm_i915_gem_context_param *args) > @@ -1458,6 +1657,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, > ret = set_ppgtt(fpriv, ctx, args); > break; > > + case I915_CONTEXT_PARAM_ENGINES: > + ret = set_engines(ctx, args); > + break; > + > case I915_CONTEXT_PARAM_BAN_PERIOD: > default: > ret = -EINVAL; > @@ -1688,6 +1891,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, > ret = get_ppgtt(file_priv, ctx, args); > break; > > + case I915_CONTEXT_PARAM_ENGINES: > + ret = get_engines(ctx, args); > + break; > + > case I915_CONTEXT_PARAM_BAN_PERIOD: > default: > ret = -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h > index 25238f750286..5a82769c6fd5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/i915_gem_context.h > @@ -112,6 +112,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_ > __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); > } > > +static inline bool > +i915_gem_context_user_engines(const struct i915_gem_context *ctx) > +{ > + return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); > +} > + > +static inline void > +i915_gem_context_set_user_engines(struct i915_gem_context *ctx) > +{ > + set_bit(CONTEXT_USER_ENGINES, &ctx->flags); > +} > + > +static inline void > +i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) > +{ > + clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); > +} > + > int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); > static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) > { > diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h > index 33e5a0e36e75..5284b50a4751 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context_types.h > +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h > @@ -139,6 +139,14 @@ struct i915_gem_context { > #define CONTEXT_BANNED 0 > #define CONTEXT_CLOSED 1 > #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 > +#define CONTEXT_USER_ENGINES 3 > + > + /** > + * @num_engines: Number of user defined engines for this context > + * > + * See @engines for the elements. > + */ > + unsigned int num_engines; Where is this set or used? > > /** > * @hw_id: - unique identifier for the context > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 679f7c1561ba..d6c5220addd0 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -2165,7 +2165,10 @@ eb_select_engine(struct i915_execbuffer *eb, > unsigned int idx; > int err; > > - idx = eb_select_legacy_ring(eb, file, args); > + if (i915_gem_context_user_engines(eb->gem_context)) > + idx = args->flags & I915_EXEC_RING_MASK; > + else > + idx = eb_select_legacy_ring(eb, file, args); > > ce = i915_gem_context_get_engine(eb->gem_context, idx); > if (IS_ERR(ce)) > diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h > index 2dbe8933b50a..1436fe2fb5f8 100644 > --- a/drivers/gpu/drm/i915/i915_utils.h > +++ b/drivers/gpu/drm/i915/i915_utils.h > @@ -25,6 +25,9 @@ > #ifndef __I915_UTILS_H > #define __I915_UTILS_H > > +#include <linux/kernel.h> > +#include <linux/overflow.h> > + > #undef WARN_ON > /* Many gcc seem to no see through this and fall over :( */ > #if 0 > @@ -73,6 +76,39 @@ > #define overflows_type(x, T) \ > (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) > > +static inline bool > +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size) > +{ > + size_t sz; > + > + if (check_mul_overflow(count, arr, &sz)) > + return false; > + > + if (check_add_overflow(sz, base, &sz)) > + return false; > + > + *size = sz; > + return true; > +} > + > +/** > + * check_struct_size() - Calculate size of structure with trailing array. > + * @p: Pointer to the structure. > + * @member: Name of the array member. > + * @n: Number of elements in the array. > + * @sz: Total size of structure and array > + * > + * Calculates size of memory needed for structure @p followed by an > + * array of @n @member elements, like struct_size() but reports > + * whether it overflowed, and the resultant size in @sz > + * > + * Return: false if the calculation overflowed. > + */ > +#define check_struct_size(p, member, n, sz) \ > + likely(__check_struct_size(sizeof(*(p)), \ > + sizeof(*(p)->member) + __must_be_array((p)->member), \ > + n, sz)) > + > #define ptr_mask_bits(ptr, n) ({ \ > unsigned long __v = (unsigned long)(ptr); \ > (typeof(ptr))(__v & -BIT(n)); \ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index d6ad4a15b2b9..8e1bb22926e4 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -136,6 +136,7 @@ enum drm_i915_gem_engine_class { > struct i915_engine_class_instance { > __u16 engine_class; /* see enum drm_i915_gem_engine_class */ > __u16 engine_instance; > +#define I915_ENGINE_CLASS_INVALID_NONE -1 > }; > > /** > @@ -1522,6 +1523,26 @@ struct drm_i915_gem_context_param { > * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. > */ > #define I915_CONTEXT_PARAM_VM 0x9 > + > +/* > + * I915_CONTEXT_PARAM_ENGINES: > + * > + * Bind this context to operate on this subset of available engines. Henceforth, > + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as > + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] > + * and upwards. Slots 0...N are filled in using the specified (class, instance). > + * Use > + * engine_class: I915_ENGINE_CLASS_INVALID, > + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE > + * to specify a gap in the array that can be filled in later, e.g. by a > + * virtual engine used for load balancing. > + * > + * Setting the number of engines bound to the context to 0, by passing a zero > + * sized argument, will revert back to default settings. > + * > + * See struct i915_context_param_engines. > + */ > +#define I915_CONTEXT_PARAM_ENGINES 0xa > /* Must be kept compact -- no holes and well documented */ > > __u64 value; > @@ -1585,6 +1606,16 @@ struct drm_i915_gem_context_param_sseu { > __u32 rsvd; > }; > > +struct i915_context_param_engines { > + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ > + struct i915_engine_class_instance engines[0]; > +} __attribute__((packed)); > + > +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ > + __u64 extensions; \ > + struct i915_engine_class_instance engines[N__]; \ > +} __attribute__((packed)) name__ > + > struct drm_i915_gem_context_create_ext_setparam { > #define I915_CONTEXT_CREATE_EXT_SETPARAM 0 > struct i915_user_extension base; > Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-04-15 13:19:45) > > + set.engines->i915 = ctx->i915; > > + for (n = 0; n < num_engines; n++) { > > + struct i915_engine_class_instance ci; > > + struct intel_engine_cs *engine; > > + > > + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { > > + __free_engines(set.engines, n); > > + return -EFAULT; > > + } > > + > > + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && > > + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { > > + set.engines->engines[n] = NULL; > > + continue; > > + } > > + > > + engine = intel_engine_lookup_user(ctx->i915, > > + ci.engine_class, > > + ci.engine_instance); > > + if (!engine) { > > + DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", > > Standardize early on "%u:%u" for class:instance? Although in this > specific case I am not the debug is so useful. (There are many other > ways the ioctl can fail.) Same goes for the earlier one. I'm quietly adopting yaml inside messages :) It's the -EINVAL that are a nightmare. -EFAULT, -ENOMEM, other specialised errno are easy to recognise, but -EINVAL is anything and usually what devs hit in practice. I can take out the DRM_DEBUG again -- I really don't like using dmesg for user error messages :) > > + n, ci.engine_class, ci.engine_instance); > > + __free_engines(set.engines, n); > > + return -ENOENT; > > + } > > + > > + set.engines->engines[n] = intel_context_create(ctx, engine); > > + if (!set.engines->engines[n]) { > > + __free_engines(set.engines, n); > > + return -ENOMEM; > > + } > > + } > > + set.engines->num_engines = num_engines; > > + > > + err = -EFAULT; > > + if (!get_user(extensions, &user->extensions)) > > + err = i915_user_extensions(u64_to_user_ptr(extensions), > > + set_engines__extensions, > > + ARRAY_SIZE(set_engines__extensions), > > + &set); > > + if (err) { > > + free_engines(set.engines); > > + return err; > > + } > > + > > +replace: > > + mutex_lock(&ctx->i915->drm.struct_mutex); > > + if (args->size) > > + i915_gem_context_set_user_engines(ctx); > > + else > > + i915_gem_context_clear_user_engines(ctx); > > + rcu_swap_protected(ctx->engines, set.engines, 1); > > + mutex_unlock(&ctx->i915->drm.struct_mutex); > > Hm so what is the new engine list mutex for? I missed replacing this with engine_mutex. It used to be required for execbuf serialisation, however that is fine now with the RCU lookup so we only need mutex around the assignment and prolonged access. > > + INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); > > + queue_rcu_work(system_wq, &set.engines->rcu); > > + > > + return 0; > > +} > > + > > +static int > > +get_engines(struct i915_gem_context *ctx, > > + struct drm_i915_gem_context_param *args) > > +{ > > + struct i915_context_param_engines __user *user; > > + struct i915_gem_engines *e; > > + size_t n, count, size; > > + int err = 0; > > + > > + err = mutex_lock_interruptible(&ctx->engines_mutex); > > + if (err) > > + return err; > > + > > + if (!i915_gem_context_user_engines(ctx)) { > > + args->size = 0; > > + goto unlock; > > + } > > + > > + e = i915_gem_context_engine_list(ctx); > > + count = e->num_engines; > > + > > + /* Be paranoid in case we have an impedance mismatch */ > > + if (!check_struct_size(user, engines, count, &size)) { > > + err = -ENOMEM; > > + goto unlock; > > + } > > + if (unlikely(overflows_type(size, args->size))) { > > I wouldn't bother with unlikely, none of the other failure modes does. check_struct_size is unlikely, we can just push it into overflows_type I guess. > > > + err = -ENOMEM; > > + goto unlock; > > Or abuse a bit -E2BIG for the two? No, the other choice here is -EINVAL. > > + } > > + > > + if (!args->size) { > > + args->size = size; > > + goto unlock; > > + } > > + > > + if (args->size < size) { > > + err = -EINVAL; > > + goto unlock; > > + } > > + > > + user = u64_to_user_ptr(args->value); > > + if (!access_ok(user, size)) { > > + err = -EFAULT; > > + goto unlock; > > + } > > + > > + if (put_user(0, &user->extensions)) { > > + err = -EFAULT; > > Hm why? Why an error for failing to write into the user struct? Or why are there no extensions? > > + goto unlock; > > + } > > + > > + for (n = 0; n < count; n++) { > > + struct i915_engine_class_instance ci = { > > + .engine_class = I915_ENGINE_CLASS_INVALID, > > + .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, > > + }; > > + > > + if (e->engines[n]) { > > + ci.engine_class = e->engines[n]->engine->uabi_class; > > + ci.engine_instance = e->engines[n]->engine->instance; > > + } > > I would be tempted to put the invalid initializers into the else block. I'd be tempted too, but I liked the look of the 3 stanzas :) > > + > > + if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { > > __copy_to_user after access_ok passed? Or there were some changes in > this area? Just not expecting a hot ioctl, so I wasn't worried about a bit of redundancy so chose not to argue why the "unsafe" variants are ok. > > diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h > > index 33e5a0e36e75..5284b50a4751 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_context_types.h > > +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h > > @@ -139,6 +139,14 @@ struct i915_gem_context { > > #define CONTEXT_BANNED 0 > > #define CONTEXT_CLOSED 1 > > #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 > > +#define CONTEXT_USER_ENGINES 3 > > + > > + /** > > + * @num_engines: Number of user defined engines for this context > > + * > > + * See @engines for the elements. > > + */ > > + unsigned int num_engines; > > Where is this set or used? Tasty leftovers. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9d3324439f1b..4e9411508dec 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -90,7 +90,6 @@ #include <drm/i915_drm.h> #include "gt/intel_lrc_reg.h" -#include "gt/intel_workarounds.h" #include "i915_drv.h" #include "i915_globals.h" @@ -143,13 +142,17 @@ static void lut_close(struct i915_gem_context *ctx) static struct intel_context * lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance) { - struct intel_engine_cs *engine; + if (!i915_gem_context_user_engines(ctx)) { + struct intel_engine_cs *engine; - engine = intel_engine_lookup_user(ctx->i915, class, instance); - if (!engine) - return ERR_PTR(-EINVAL); + engine = intel_engine_lookup_user(ctx->i915, class, instance); + if (!engine) + return ERR_PTR(-EINVAL); + + instance = engine->id; + } - return i915_gem_context_get_engine(ctx, engine->id); + return i915_gem_context_get_engine(ctx, instance); } static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) @@ -257,6 +260,17 @@ static void free_engines(struct i915_gem_engines *e) __free_engines(e, e->num_engines); } +static void free_engines_rcu(struct work_struct *wrk) +{ + struct i915_gem_engines *e = + container_of(wrk, struct i915_gem_engines, rcu.work); + struct drm_i915_private *i915 = e->i915; + + mutex_lock(&i915->drm.struct_mutex); + free_engines(e); + mutex_unlock(&i915->drm.struct_mutex); +} + static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) { struct intel_engine_cs *engine; @@ -1385,6 +1399,191 @@ static int set_sseu(struct i915_gem_context *ctx, return ret; } +struct set_engines { + struct i915_gem_context *ctx; + struct i915_gem_engines *engines; +}; + +static const i915_user_extension_fn set_engines__extensions[] = { +}; + +static int +set_engines(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + struct set_engines set = { .ctx = ctx }; + unsigned int num_engines, n; + u64 extensions; + int err; + + if (!args->size) { /* switch back to legacy user_ring_map */ + if (!i915_gem_context_user_engines(ctx)) + return 0; + + set.engines = default_engines(ctx); + if (IS_ERR(set.engines)) + return PTR_ERR(set.engines); + + goto replace; + } + + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size, sizeof(*user->engines))) { + DRM_DEBUG("Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + /* + * Note that I915_EXEC_RING_MASK limits execbuf to only using the + * first 64 engines defined here. + */ + num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + + set.engines = kmalloc(struct_size(set.engines, engines, num_engines), + GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + set.engines->i915 = ctx->i915; + for (n = 0; n < num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + __free_engines(set.engines, n); + return -EFAULT; + } + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { + set.engines->engines[n] = NULL; + continue; + } + + engine = intel_engine_lookup_user(ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + __free_engines(set.engines, n); + return -ENOENT; + } + + set.engines->engines[n] = intel_context_create(ctx, engine); + if (!set.engines->engines[n]) { + __free_engines(set.engines, n); + return -ENOMEM; + } + } + set.engines->num_engines = num_engines; + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_engines__extensions, + ARRAY_SIZE(set_engines__extensions), + &set); + if (err) { + free_engines(set.engines); + return err; + } + +replace: + mutex_lock(&ctx->i915->drm.struct_mutex); + if (args->size) + i915_gem_context_set_user_engines(ctx); + else + i915_gem_context_clear_user_engines(ctx); + rcu_swap_protected(ctx->engines, set.engines, 1); + mutex_unlock(&ctx->i915->drm.struct_mutex); + + INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &set.engines->rcu); + + return 0; +} + +static int +get_engines(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user; + struct i915_gem_engines *e; + size_t n, count, size; + int err = 0; + + err = mutex_lock_interruptible(&ctx->engines_mutex); + if (err) + return err; + + if (!i915_gem_context_user_engines(ctx)) { + args->size = 0; + goto unlock; + } + + e = i915_gem_context_engine_list(ctx); + count = e->num_engines; + + /* Be paranoid in case we have an impedance mismatch */ + if (!check_struct_size(user, engines, count, &size)) { + err = -ENOMEM; + goto unlock; + } + if (unlikely(overflows_type(size, args->size))) { + err = -ENOMEM; + goto unlock; + } + + if (!args->size) { + args->size = size; + goto unlock; + } + + if (args->size < size) { + err = -EINVAL; + goto unlock; + } + + user = u64_to_user_ptr(args->value); + if (!access_ok(user, size)) { + err = -EFAULT; + goto unlock; + } + + if (put_user(0, &user->extensions)) { + err = -EFAULT; + goto unlock; + } + + for (n = 0; n < count; n++) { + struct i915_engine_class_instance ci = { + .engine_class = I915_ENGINE_CLASS_INVALID, + .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, + }; + + if (e->engines[n]) { + ci.engine_class = e->engines[n]->engine->uabi_class; + ci.engine_instance = e->engines[n]->engine->instance; + } + + if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { + err = -EFAULT; + goto unlock; + } + } + + args->size = size; + +unlock: + mutex_unlock(&ctx->engines_mutex); + return err; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1458,6 +1657,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_ppgtt(fpriv, ctx, args); break; + case I915_CONTEXT_PARAM_ENGINES: + ret = set_engines(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -1688,6 +1891,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; + case I915_CONTEXT_PARAM_ENGINES: + ret = get_engines(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 25238f750286..5a82769c6fd5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -112,6 +112,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_ __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); } +static inline bool +i915_gem_context_user_engines(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_set_user_engines(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index 33e5a0e36e75..5284b50a4751 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -139,6 +139,14 @@ struct i915_gem_context { #define CONTEXT_BANNED 0 #define CONTEXT_CLOSED 1 #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 +#define CONTEXT_USER_ENGINES 3 + + /** + * @num_engines: Number of user defined engines for this context + * + * See @engines for the elements. + */ + unsigned int num_engines; /** * @hw_id: - unique identifier for the context diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 679f7c1561ba..d6c5220addd0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2165,7 +2165,10 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int idx; int err; - idx = eb_select_legacy_ring(eb, file, args); + if (i915_gem_context_user_engines(eb->gem_context)) + idx = args->flags & I915_EXEC_RING_MASK; + else + idx = eb_select_legacy_ring(eb, file, args); ce = i915_gem_context_get_engine(eb->gem_context, idx); if (IS_ERR(ce)) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 2dbe8933b50a..1436fe2fb5f8 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,9 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#include <linux/kernel.h> +#include <linux/overflow.h> + #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ #if 0 @@ -73,6 +76,39 @@ #define overflows_type(x, T) \ (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) +static inline bool +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size) +{ + size_t sz; + + if (check_mul_overflow(count, arr, &sz)) + return false; + + if (check_add_overflow(sz, base, &sz)) + return false; + + *size = sz; + return true; +} + +/** + * check_struct_size() - Calculate size of structure with trailing array. + * @p: Pointer to the structure. + * @member: Name of the array member. + * @n: Number of elements in the array. + * @sz: Total size of structure and array + * + * Calculates size of memory needed for structure @p followed by an + * array of @n @member elements, like struct_size() but reports + * whether it overflowed, and the resultant size in @sz + * + * Return: false if the calculation overflowed. + */ +#define check_struct_size(p, member, n, sz) \ + likely(__check_struct_size(sizeof(*(p)), \ + sizeof(*(p)->member) + __must_be_array((p)->member), \ + n, sz)) + #define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ (typeof(ptr))(__v & -BIT(n)); \ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d6ad4a15b2b9..8e1bb22926e4 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -136,6 +136,7 @@ enum drm_i915_gem_engine_class { struct i915_engine_class_instance { __u16 engine_class; /* see enum drm_i915_gem_engine_class */ __u16 engine_instance; +#define I915_ENGINE_CLASS_INVALID_NONE -1 }; /** @@ -1522,6 +1523,26 @@ struct drm_i915_gem_context_param { * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. */ #define I915_CONTEXT_PARAM_VM 0x9 + +/* + * I915_CONTEXT_PARAM_ENGINES: + * + * Bind this context to operate on this subset of available engines. Henceforth, + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] + * and upwards. Slots 0...N are filled in using the specified (class, instance). + * Use + * engine_class: I915_ENGINE_CLASS_INVALID, + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE + * to specify a gap in the array that can be filled in later, e.g. by a + * virtual engine used for load balancing. + * + * Setting the number of engines bound to the context to 0, by passing a zero + * sized argument, will revert back to default settings. + * + * See struct i915_context_param_engines. + */ +#define I915_CONTEXT_PARAM_ENGINES 0xa /* Must be kept compact -- no holes and well documented */ __u64 value; @@ -1585,6 +1606,16 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +struct i915_context_param_engines { + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ + __u64 extensions; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + struct drm_i915_gem_context_create_ext_setparam { #define I915_CONTEXT_CREATE_EXT_SETPARAM 0 struct i915_user_extension base;