Message ID | 20190408091728.20207-21-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/29] drm/i915: Mark up ips for RCU protection | expand |
On 08/04/2019 10:17, Chris Wilson wrote: > We switched to a tree of per-engine HW context to accommodate the > introduction of virtual engines. However, we plan to also support > multiple instances of the same engine within the GEM context, defeating > our use of the engine as a key to looking up the HW context. Just > allocate a logical per-engine instance and always use an index into the > ctx->engines[]. Later on, this ctx->engines[] may be replaced by a user > specified map. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/gt/intel_context.c | 97 ++----------------- > drivers/gpu/drm/i915/gt/intel_context.h | 25 +---- > drivers/gpu/drm/i915/gt/intel_context_types.h | 2 - > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- > drivers/gpu/drm/i915/gt/mock_engine.c | 3 +- > drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- > drivers/gpu/drm/i915/i915_gem.c | 29 +++--- > drivers/gpu/drm/i915/i915_gem_context.c | 92 +++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_context.h | 42 ++++++++ > drivers/gpu/drm/i915/i915_gem_context_types.h | 26 ++++- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 70 ++++++------- > drivers/gpu/drm/i915/i915_perf.c | 81 +++++++++------- > drivers/gpu/drm/i915/i915_request.c | 2 +- > drivers/gpu/drm/i915/intel_guc_submission.c | 24 +++-- > .../gpu/drm/i915/selftests/i915_gem_context.c | 2 +- > drivers/gpu/drm/i915/selftests/mock_context.c | 14 ++- > 16 files changed, 283 insertions(+), 230 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index 46bf8d8fb7e4..803ac9a1dd15 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -17,7 +17,7 @@ static struct i915_global_context { > struct kmem_cache *slab_ce; > } global; > > -struct intel_context *intel_context_alloc(void) > +static struct intel_context *intel_context_alloc(void) > { > return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); > } > @@ -28,104 +28,17 @@ void intel_context_free(struct intel_context *ce) > } > > struct intel_context * > -intel_context_lookup(struct i915_gem_context *ctx, > +intel_context_create(struct i915_gem_context *ctx, > struct intel_engine_cs *engine) > { > - struct intel_context *ce = NULL; > - struct rb_node *p; > - > - spin_lock(&ctx->hw_contexts_lock); > - p = ctx->hw_contexts.rb_node; > - while (p) { > - struct intel_context *this = > - rb_entry(p, struct intel_context, node); > - > - if (this->engine == engine) { > - GEM_BUG_ON(this->gem_context != ctx); > - ce = this; > - break; > - } > - > - if (this->engine < engine) > - p = p->rb_right; > - else > - p = p->rb_left; > - } > - spin_unlock(&ctx->hw_contexts_lock); > - > - return ce; > -} > - > -struct intel_context * > -__intel_context_insert(struct i915_gem_context *ctx, > - struct intel_engine_cs *engine, > - struct intel_context *ce) > -{ > - struct rb_node **p, *parent; > - int err = 0; > - > - spin_lock(&ctx->hw_contexts_lock); > - > - parent = NULL; > - p = &ctx->hw_contexts.rb_node; > - while (*p) { > - struct intel_context *this; > - > - parent = *p; > - this = rb_entry(parent, struct intel_context, node); > - > - if (this->engine == engine) { > - err = -EEXIST; > - ce = this; > - break; > - } > - > - if (this->engine < engine) > - p = &parent->rb_right; > - else > - p = &parent->rb_left; > - } > - if (!err) { > - rb_link_node(&ce->node, parent, p); > - rb_insert_color(&ce->node, &ctx->hw_contexts); > - } > - > - spin_unlock(&ctx->hw_contexts_lock); > - > - return ce; > -} > - > -void __intel_context_remove(struct intel_context *ce) > -{ > - struct i915_gem_context *ctx = ce->gem_context; > - > - spin_lock(&ctx->hw_contexts_lock); > - rb_erase(&ce->node, &ctx->hw_contexts); > - spin_unlock(&ctx->hw_contexts_lock); > -} > - > -struct intel_context * > -intel_context_instance(struct i915_gem_context *ctx, > - struct intel_engine_cs *engine) > -{ > - struct intel_context *ce, *pos; > - > - ce = intel_context_lookup(ctx, engine); > - if (likely(ce)) > - return intel_context_get(ce); > + struct intel_context *ce; > > ce = intel_context_alloc(); > if (!ce) > return ERR_PTR(-ENOMEM); > > intel_context_init(ce, ctx, engine); > - > - pos = __intel_context_insert(ctx, engine, ce); > - if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */ > - intel_context_free(ce); > - > - GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos); > - return intel_context_get(pos); > + return ce; > } > > int __intel_context_do_pin(struct intel_context *ce) > @@ -204,6 +117,8 @@ intel_context_init(struct intel_context *ce, > struct i915_gem_context *ctx, > struct intel_engine_cs *engine) > { > + GEM_BUG_ON(!engine->cops); > + > kref_init(&ce->ref); > > ce->gem_context = ctx; > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index 4f8ef45e6344..567a415be513 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -12,24 +12,16 @@ > #include "intel_context_types.h" > #include "intel_engine_types.h" > > -struct intel_context *intel_context_alloc(void); > -void intel_context_free(struct intel_context *ce); > - > void intel_context_init(struct intel_context *ce, > struct i915_gem_context *ctx, > struct intel_engine_cs *engine); > > -/** > - * intel_context_lookup - Find the matching HW context for this (ctx, engine) > - * @ctx - the parent GEM context > - * @engine - the target HW engine > - * > - * May return NULL if the HW context hasn't been instantiated (i.e. unused). > - */ > struct intel_context * > -intel_context_lookup(struct i915_gem_context *ctx, > +intel_context_create(struct i915_gem_context *ctx, > struct intel_engine_cs *engine); > > +void intel_context_free(struct intel_context *ce); > + > /** > * intel_context_pin_lock - Stablises the 'pinned' status of the HW context > * @ctx - the parent GEM context > @@ -59,17 +51,6 @@ intel_context_is_pinned(struct intel_context *ce) > > void intel_context_pin_unlock(struct intel_context *ce); > > -struct intel_context * > -__intel_context_insert(struct i915_gem_context *ctx, > - struct intel_engine_cs *engine, > - struct intel_context *ce); > -void > -__intel_context_remove(struct intel_context *ce); > - > -struct intel_context * > -intel_context_instance(struct i915_gem_context *ctx, > - struct intel_engine_cs *engine); > - > int __intel_context_do_pin(struct intel_context *ce); > > static inline int intel_context_pin(struct intel_context *ce) > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h > index f02d27734e3b..3579c2708321 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -10,7 +10,6 @@ > #include <linux/kref.h> > #include <linux/list.h> > #include <linux/mutex.h> > -#include <linux/rbtree.h> > #include <linux/types.h> > > #include "i915_active_types.h" > @@ -61,7 +60,6 @@ struct intel_context { > struct i915_active_request active_tracker; > > const struct intel_context_ops *ops; > - struct rb_node node; > > /** sseu: Control eu/slice partitioning */ > struct intel_sseu sseu; > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 3f794bc71958..0df3c5238c04 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx, > struct intel_context *ce; > int err; > > - ce = intel_context_instance(ctx, engine); > + ce = i915_gem_context_get_engine(ctx, engine->id); Staying with intel_context_instance wouldn't help to reduce the churn? > if (IS_ERR(ce)) > return PTR_ERR(ce); > > diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c > index 3b672e011cf0..87e538825fee 100644 > --- a/drivers/gpu/drm/i915/gt/mock_engine.c > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c > @@ -23,6 +23,7 @@ > */ > > #include "i915_drv.h" > +#include "i915_gem_context.h" > #include "intel_context.h" > #include "intel_engine_pm.h" > > @@ -286,7 +287,7 @@ int mock_engine_init(struct intel_engine_cs *engine) > i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); > > engine->kernel_context = > - intel_context_instance(i915->kernel_context, engine); > + i915_gem_context_get_engine(i915->kernel_context, engine->id); > if (IS_ERR(engine->kernel_context)) > goto err_timeline; > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c > index 606fc2713240..8b6574e1b495 100644 > --- a/drivers/gpu/drm/i915/gvt/scheduler.c > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c > @@ -1188,7 +1188,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) > INIT_LIST_HEAD(&s->workload_q_head[i]); > s->shadow[i] = ERR_PTR(-EINVAL); > > - ce = intel_context_instance(ctx, engine); > + ce = i915_gem_context_get_engine(ctx, i); > if (IS_ERR(ce)) { > ret = PTR_ERR(ce); > goto out_shadow_ctx; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 50266e87c225..21b4a04c424b 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -4312,8 +4312,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) > > static int __intel_engines_record_defaults(struct drm_i915_private *i915) > { > - struct i915_gem_context *ctx; > struct intel_engine_cs *engine; > + struct i915_gem_context *ctx; > + struct i915_gem_engines *e; > enum intel_engine_id id; > int err = 0; > > @@ -4330,18 +4331,26 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) > if (IS_ERR(ctx)) > return PTR_ERR(ctx); > > + e = i915_gem_context_engine_list_lock(ctx); > + > for_each_engine(engine, i915, id) { Do we need i915 version of for_each_context_engine? If all call sites will doing this "lock ctx" -> "walk physical engines" -> "lookup in context" then it seems a bit disconnected. > + struct intel_context *ce = e->engines[id]; How will index by engine->id work for engine map? > struct i915_request *rq; > > - rq = i915_request_alloc(engine, ctx); > + err = intel_context_pin(ce); > + if (err) > + goto err_active; > + > + rq = i915_request_create(ce); > + intel_context_unpin(ce); Kind of verbose, no? Do you want to add some middle-ground-between-request-alloc-and-create helper? > if (IS_ERR(rq)) { > err = PTR_ERR(rq); > - goto out_ctx; > + goto err_active; > } > > err = 0; > - if (engine->init_context) > - err = engine->init_context(rq); > + if (rq->engine->init_context) > + err = rq->engine->init_context(rq); > > i915_request_add(rq); > if (err) > @@ -4355,15 +4364,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) > } > > for_each_engine(engine, i915, id) { > - struct intel_context *ce; > - struct i915_vma *state; > + struct intel_context *ce = e->engines[id]; > + struct i915_vma *state = ce->state; > void *vaddr; > > - ce = intel_context_lookup(ctx, engine); > - if (!ce) > - continue; > - > - state = ce->state; > if (!state) > continue; > > @@ -4419,6 +4423,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) > } > > out_ctx: > + i915_gem_context_engine_list_unlock(ctx); > i915_gem_context_set_closed(ctx); > i915_gem_context_put(ctx); > return err; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 1e84fe0a4c55..517816558c85 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -150,7 +150,7 @@ lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance) > if (!engine) > return ERR_PTR(-EINVAL); > > - return intel_context_instance(ctx, engine); > + return i915_gem_context_get_engine(ctx, engine->id); > } > > static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) > @@ -242,10 +242,54 @@ static void release_hw_id(struct i915_gem_context *ctx) > mutex_unlock(&i915->contexts.mutex); > } > > -static void i915_gem_context_free(struct i915_gem_context *ctx) > +static void free_engines(struct i915_gem_engines *e) > +{ > + unsigned int n; > + > + for (n = 0; n < e->num_engines; n++) { > + if (!e->engines[n]) > + continue; > + > + intel_context_put(e->engines[n]); > + } > + kfree(e); > +} > + > +static void free_engines_n(struct i915_gem_engines *e, unsigned long n) > { > - struct intel_context *it, *n; > + e->num_engines = n; > + free_engines(e); > +} > + > +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) > +{ > + struct intel_engine_cs *engine; > + struct i915_gem_engines *e; > + enum intel_engine_id id; > + > + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); > + if (!e) > + return ERR_PTR(-ENOMEM); > + > + e->i915 = ctx->i915; > + for_each_engine(engine, ctx->i915, id) { > + struct intel_context *ce; > > + ce = intel_context_create(ctx, engine); > + if (IS_ERR(ce)) { > + free_engines_n(e, id); I dislike piggy-back of n into e. How about: __free_engines(e, n) { ... } free_engines(e) { __fre_engines(e, e->num_engines): } ? Or even you could e->num_engines++ in the above loop and just have one free_engines. > + return ERR_CAST(ce); > + } > + > + e->engines[id] = ce; Each context would have a sparse array of engines, on most platforms. Would it be workable to instead create a compact array per context, and just have a per device translation table of idx to engine->id? Or vice-versa, I can't figure out straight from the bat which one would you need. I guess in this scheme you would need some translation as well to support customised engine maps.. will see if I will spot that later in the patch. > + } > + e->num_engines = id; > + > + return e; > +} > + > +static void i915_gem_context_free(struct i915_gem_context *ctx) > +{ > lockdep_assert_held(&ctx->i915->drm.struct_mutex); > GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); > GEM_BUG_ON(!list_empty(&ctx->active_engines)); > @@ -253,8 +297,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) > release_hw_id(ctx); > i915_ppgtt_put(ctx->ppgtt); > > - rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node) > - intel_context_put(it); > + free_engines(rcu_access_pointer(ctx->engines)); > + mutex_destroy(&ctx->engines_mutex); > > if (ctx->timeline) > i915_timeline_put(ctx->timeline); > @@ -363,6 +407,8 @@ static struct i915_gem_context * > __create_context(struct drm_i915_private *dev_priv) > { > struct i915_gem_context *ctx; > + struct i915_gem_engines *e; > + int err; > int i; > > ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); > @@ -376,8 +422,13 @@ __create_context(struct drm_i915_private *dev_priv) > INIT_LIST_HEAD(&ctx->active_engines); > mutex_init(&ctx->mutex); > > - ctx->hw_contexts = RB_ROOT; > - spin_lock_init(&ctx->hw_contexts_lock); > + mutex_init(&ctx->engines_mutex); > + e = default_engines(ctx); > + if (IS_ERR(e)) { > + err = PTR_ERR(e); > + goto err_free; > + } > + RCU_INIT_POINTER(ctx->engines, e); > > INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); > INIT_LIST_HEAD(&ctx->handles_list); > @@ -399,6 +450,10 @@ __create_context(struct drm_i915_private *dev_priv) > ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; > > return ctx; > + > +err_free: > + kfree(ctx); > + return ERR_PTR(err); > } > > static struct i915_hw_ppgtt * > @@ -862,7 +917,8 @@ static int context_barrier_task(struct i915_gem_context *ctx, > { > struct drm_i915_private *i915 = ctx->i915; > struct context_barrier_task *cb; > - struct intel_context *ce, *next; > + struct i915_gem_engines *e; > + unsigned int i; > int err = 0; > > lockdep_assert_held(&i915->drm.struct_mutex); > @@ -875,20 +931,29 @@ static int context_barrier_task(struct i915_gem_context *ctx, > i915_active_init(i915, &cb->base, cb_retire); > i915_active_acquire(&cb->base); > > - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { > - struct intel_engine_cs *engine = ce->engine; > + e = i915_gem_context_engine_list_lock(ctx); > + for (i = 0; i < e->num_engines; i++) { > + struct intel_context *ce = e->engines[i]; > struct i915_request *rq; > > - if (!(engine->mask & engines)) > + if (!ce || !(ce->engine->mask & engines)) Definitely looks like a case for for_each_context_engine. > + continue; > + > + if (!intel_context_is_pinned(ce)) > continue; > > if (I915_SELFTEST_ONLY(context_barrier_inject_fault & > - engine->mask)) { > + ce->engine->mask)) { > err = -ENXIO; > break; > } > > - rq = i915_request_alloc(engine, ctx); > + err = intel_context_pin(ce); > + if (err) > + break; > + > + rq = i915_request_create(ce); > + intel_context_unpin(ce); Yep as mentioned somewhere above. I definitely think another helper would help code base redability. Even if called unimaginatively as __i915_request_add. > if (IS_ERR(rq)) { > err = PTR_ERR(rq); > break; > @@ -904,6 +969,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, > if (err) > break; > } > + i915_gem_context_engine_list_unlock(ctx); > > cb->task = err ? NULL : task; /* caller needs to unwind instead */ > cb->data = data; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h > index 5a8e080499fb..d94fc4c1907c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/i915_gem_context.h > @@ -176,6 +176,48 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) > kref_put(&ctx->ref, i915_gem_context_release); > } > > +static inline struct i915_gem_engines * > +i915_gem_context_engine_list(struct i915_gem_context *ctx) > +{ > + return rcu_dereference_protected(ctx->engines, > + lockdep_is_held(&ctx->engines_mutex)); > +} > + > +static inline struct i915_gem_engines * > +i915_gem_context_engine_list_lock(struct i915_gem_context *ctx) > + __acquires(&ctx->engines_mutex) > +{ > + mutex_lock(&ctx->engines_mutex); > + return i915_gem_context_engine_list(ctx); > +} > + > +static inline void > +i915_gem_context_engine_list_unlock(struct i915_gem_context *ctx) > + __releases(&ctx->engines_mutex) > +{ > + mutex_unlock(&ctx->engines_mutex); > +} > + > +static inline struct intel_context * > +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned long idx) > +{ > + return i915_gem_context_engine_list(ctx)->engines[idx]; > +} > + > +static inline struct intel_context * > +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned long idx) > +{ > + struct intel_context *ce = ERR_PTR(-EINVAL); > + > + rcu_read_lock(); { > + struct i915_gem_engines *e = rcu_dereference(ctx->engines); > + if (likely(idx < e->num_engines && e->engines[idx])) > + ce = intel_context_get(e->engines[idx]); > + } rcu_read_unlock(); > + > + return ce; > +} > + > struct i915_lut_handle *i915_lut_handle_alloc(void); > void i915_lut_handle_free(struct i915_lut_handle *lut); > > diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h > index d282a6ab3b9f..1610a74b0283 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context_types.h > +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h > @@ -29,6 +29,13 @@ struct i915_hw_ppgtt; > struct i915_timeline; > struct intel_ring; > > +struct i915_gem_engines { > + struct rcu_work rcu; > + struct drm_i915_private *i915; > + unsigned long num_engines; unsigned int? > + struct intel_context *engines[]; > +}; > + > /** > * struct i915_gem_context - client state > * > @@ -42,6 +49,21 @@ struct i915_gem_context { > /** file_priv: owning file descriptor */ > struct drm_i915_file_private *file_priv; > > + /** > + * @engines: User defined engines for this context > + * > + * NULL means to use legacy definitions (including random meaning of > + * I915_EXEC_BSD with I915_EXEC_BSD_SELECTOR overrides). > + * > + * If defined, execbuf uses the I915_EXEC_MASK as an index into > + * array, and various uAPI other the ability to lookup up an > + * index from this array to select an engine operate on. > + * > + * User defined by I915_CONTEXT_PARAM_ENGINE. > + */ > + struct i915_gem_engines __rcu *engines; > + struct mutex engines_mutex; /* guards writes to engines */ > + > struct i915_timeline *timeline; > > /** > @@ -134,10 +156,6 @@ struct i915_gem_context { > > struct i915_sched_attr sched; > > - /** hw_contexts: per-engine logical HW state */ > - struct rb_root hw_contexts; > - spinlock_t hw_contexts_lock; > - > /** ring_size: size for allocating the per-engine ring buffer */ > u32 ring_size; > /** desc_template: invariant fields for the HW context descriptor */ > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index c700cbc2f594..941192c60832 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -2077,9 +2077,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, > return file_priv->bsd_engine; > } > > -#define I915_USER_RINGS (4) > - > -static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { > +static const enum intel_engine_id user_ring_map[] = { > [I915_EXEC_DEFAULT] = RCS0, > [I915_EXEC_RENDER] = RCS0, > [I915_EXEC_BLT] = BCS0, > @@ -2087,10 +2085,8 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { > [I915_EXEC_VEBOX] = VECS0 > }; > > -static int eb_pin_context(struct i915_execbuffer *eb, > - struct intel_engine_cs *engine) > +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) > { > - struct intel_context *ce; > int err; > > /* > @@ -2101,21 +2097,16 @@ static int eb_pin_context(struct i915_execbuffer *eb, > if (err) > return err; > > - ce = intel_context_instance(eb->gem_context, engine); > - if (IS_ERR(ce)) > - return PTR_ERR(ce); > - > /* > * Pinning the contexts may generate requests in order to acquire > * GGTT space, so do this first before we reserve a seqno for > * ourselves. > */ > err = intel_context_pin(ce); > - intel_context_put(ce); > if (err) > return err; > > - eb->engine = engine; > + eb->engine = ce->engine; > eb->context = ce; > return 0; > } > @@ -2125,24 +2116,18 @@ static void eb_unpin_context(struct i915_execbuffer *eb) > intel_context_unpin(eb->context); > } > > -static int > -eb_select_engine(struct i915_execbuffer *eb, > - struct drm_file *file, > - struct drm_i915_gem_execbuffer2 *args) > +static unsigned int > +eb_select_legacy_ring(struct i915_execbuffer *eb, > + struct drm_file *file, > + struct drm_i915_gem_execbuffer2 *args) > { > unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; > - struct intel_engine_cs *engine; > - > - if (user_ring_id > I915_USER_RINGS) { > - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); > - return -EINVAL; > - } > > - if ((user_ring_id != I915_EXEC_BSD) && > - ((args->flags & I915_EXEC_BSD_MASK) != 0)) { > + if (user_ring_id != I915_EXEC_BSD && > + (args->flags & I915_EXEC_BSD_MASK)) { > DRM_DEBUG("execbuf with non bsd ring but with invalid " > "bsd dispatch flags: %d\n", (int)(args->flags)); > - return -EINVAL; > + return -1; > } > > if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) { > @@ -2157,20 +2142,39 @@ eb_select_engine(struct i915_execbuffer *eb, > } else { > DRM_DEBUG("execbuf with unknown bsd ring: %u\n", > bsd_idx); > - return -EINVAL; > + return -1; > } > > - engine = eb->i915->engine[_VCS(bsd_idx)]; > - } else { > - engine = eb->i915->engine[user_ring_map[user_ring_id]]; > + return _VCS(bsd_idx); > } > > - if (!engine) { > - DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); > - return -EINVAL; > + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { > + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); > + return -1; > } > > - return eb_pin_context(eb, engine); > + return user_ring_map[user_ring_id]; > +} > + > +static int > +eb_select_engine(struct i915_execbuffer *eb, > + struct drm_file *file, > + struct drm_i915_gem_execbuffer2 *args) > +{ > + struct intel_context *ce; > + unsigned int idx; > + int err; > + > + idx = eb_select_legacy_ring(eb, file, args); > + > + ce = i915_gem_context_get_engine(eb->gem_context, idx); > + if (IS_ERR(ce)) > + return PTR_ERR(ce); > + > + err = eb_pin_context(eb, ce); > + intel_context_put(ce); > + > + return err; > } > > static void > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index afaeabe5e531..304e597e19e6 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c I leave the rest for a second pass. Regards, Tvrtko > @@ -1203,35 +1203,37 @@ static int i915_oa_read(struct i915_perf_stream *stream, > static struct intel_context *oa_pin_context(struct drm_i915_private *i915, > struct i915_gem_context *ctx) > { > - struct intel_engine_cs *engine = i915->engine[RCS0]; > - struct intel_context *ce; > + struct i915_gem_engines *e; > + unsigned int i; > int err; > > - ce = intel_context_instance(ctx, engine); > - if (IS_ERR(ce)) > - return ce; > - > err = i915_mutex_lock_interruptible(&i915->drm); > - if (err) { > - intel_context_put(ce); > + if (err) > return ERR_PTR(err); > - } > > - /* > - * As the ID is the gtt offset of the context's vma we > - * pin the vma to ensure the ID remains fixed. > - * > - * NB: implied RCS engine... > - */ > - err = intel_context_pin(ce); > + e = i915_gem_context_engine_list_lock(ctx); > + for (i = 0; i < e->num_engines; i++) { > + struct intel_context *ce = e->engines[i]; > + > + if (ce->engine->class != RENDER_CLASS) > + continue; > + > + /* > + * As the ID is the gtt offset of the context's vma we > + * pin the vma to ensure the ID remains fixed. > + */ > + err = intel_context_pin(ce); > + if (err == 0) { > + i915->perf.oa.pinned_ctx = ce; > + break; > + } > + } > + i915_gem_context_engine_list_unlock(ctx); > mutex_unlock(&i915->drm.struct_mutex); > - intel_context_put(ce); > if (err) > return ERR_PTR(err); > > - i915->perf.oa.pinned_ctx = ce; > - > - return ce; > + return i915->perf.oa.pinned_ctx; > } > > /** > @@ -1717,10 +1719,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, > static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > const struct i915_oa_config *oa_config) > { > - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; > unsigned int map_type = i915_coherent_map_type(dev_priv); > struct i915_gem_context *ctx; > struct i915_request *rq; > + unsigned int i; > int ret; > > lockdep_assert_held(&dev_priv->drm.struct_mutex); > @@ -1746,30 +1748,41 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > > /* Update all contexts now that we've stalled the submission. */ > list_for_each_entry(ctx, &dev_priv->contexts.list, link) { > - struct intel_context *ce = intel_context_lookup(ctx, engine); > - u32 *regs; > + struct i915_gem_engines *e = > + i915_gem_context_engine_list_lock(ctx); > > - /* OA settings will be set upon first use */ > - if (!ce || !ce->state) > - continue; > + for (i = 0; i < e->num_engines; i++) { > + struct intel_context *ce = e->engines[i]; > + u32 *regs; > + > + if (!ce || ce->engine->class != RENDER_CLASS) > + continue; > + > + /* OA settings will be set upon first use */ > + if (!ce->state) > + continue; > > - regs = i915_gem_object_pin_map(ce->state->obj, map_type); > - if (IS_ERR(regs)) > - return PTR_ERR(regs); > + regs = i915_gem_object_pin_map(ce->state->obj, > + map_type); > + if (IS_ERR(regs)) > + return PTR_ERR(regs); > > - ce->state->obj->mm.dirty = true; > - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); > + ce->state->obj->mm.dirty = true; > + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); > > - gen8_update_reg_state_unlocked(ce, regs, oa_config); > + gen8_update_reg_state_unlocked(ce, regs, oa_config); > + > + i915_gem_object_unpin_map(ce->state->obj); > + } > > - i915_gem_object_unpin_map(ce->state->obj); > + i915_gem_context_engine_list_unlock(ctx); > } > > /* > * Apply the configuration by doing one context restore of the edited > * context image. > */ > - rq = i915_request_create(engine->kernel_context); > + rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); > if (IS_ERR(rq)) > return PTR_ERR(rq); > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 8abd891d9287..5e52c1623c17 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -767,7 +767,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > * GGTT space, so do this first before we reserve a seqno for > * ourselves. > */ > - ce = intel_context_instance(ctx, engine); > + ce = i915_gem_context_get_engine(ctx, engine->id); > if (IS_ERR(ce)) > return ERR_CAST(ce); > > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index 7fbfcb3d63e0..ae230b38138a 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -364,11 +364,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) > static void guc_stage_desc_init(struct intel_guc_client *client) > { > struct intel_guc *guc = client->guc; > - struct drm_i915_private *dev_priv = guc_to_i915(guc); > - struct intel_engine_cs *engine; > struct i915_gem_context *ctx = client->owner; > + struct i915_gem_engines *e; > struct guc_stage_desc *desc; > - unsigned int tmp; > + unsigned int i; > u32 gfx_addr; > > desc = __get_stage_desc(client); > @@ -382,10 +381,13 @@ static void guc_stage_desc_init(struct intel_guc_client *client) > desc->priority = client->priority; > desc->db_id = client->doorbell_id; > > - for_each_engine_masked(engine, dev_priv, client->engines, tmp) { > - struct intel_context *ce = intel_context_lookup(ctx, engine); > - u32 guc_engine_id = engine->guc_id; > - struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; > + e = i915_gem_context_engine_list_lock(ctx); > + for (i = 0; i < e->num_engines; i++) { > + struct intel_context *ce = e->engines[i]; > + struct guc_execlist_context *lrc; > + > + if (!ce || !(ce->engine->mask & client->engines)) > + continue; > > /* TODO: We have a design issue to be solved here. Only when we > * receive the first batch, we know which engine is used by the > @@ -394,7 +396,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) > * for now who owns a GuC client. But for future owner of GuC > * client, need to make sure lrc is pinned prior to enter here. > */ > - if (!ce || !ce->state) > + if (!ce->state) > break; /* XXX: continue? */ > > /* > @@ -404,6 +406,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) > * Instead, the GuC uses the LRCA of the user mode context (see > * guc_add_request below). > */ > + lrc = &desc->lrc[ce->engine->guc_id]; > lrc->context_desc = lower_32_bits(ce->lrc_desc); > > /* The state page is after PPHWSP */ > @@ -414,15 +417,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client) > * here. In proxy submission, it wants the stage id > */ > lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | > - (guc_engine_id << GUC_ELC_ENGINE_OFFSET); > + (ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET); > > lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma); > lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; > lrc->ring_next_free_location = lrc->ring_begin; > lrc->ring_current_tail_pointer_value = 0; > > - desc->engines_used |= (1 << guc_engine_id); > + desc->engines_used |= BIT(ce->engine->guc_id); > } > + i915_gem_context_engine_list_unlock(ctx); > > DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", > client->engines, desc->engines_used); > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > index 214d1fd2f4dc..7fd224a4ca4c 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > @@ -1094,7 +1094,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, > > wakeref = intel_runtime_pm_get(i915); > > - ce = intel_context_instance(ctx, i915->engine[RCS0]); > + ce = i915_gem_context_get_engine(ctx, RCS0); > if (IS_ERR(ce)) { > ret = PTR_ERR(ce); > goto out_rpm; > diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c > index 0426093bf1d9..71c750693585 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_context.c > +++ b/drivers/gpu/drm/i915/selftests/mock_context.c > @@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915, > const char *name) > { > struct i915_gem_context *ctx; > + struct i915_gem_engines *e; > int ret; > > ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); > @@ -40,8 +41,11 @@ mock_context(struct drm_i915_private *i915, > INIT_LIST_HEAD(&ctx->link); > ctx->i915 = i915; > > - ctx->hw_contexts = RB_ROOT; > - spin_lock_init(&ctx->hw_contexts_lock); > + mutex_init(&ctx->engines_mutex); > + e = default_engines(ctx); > + if (IS_ERR(e)) > + goto err_free; > + RCU_INIT_POINTER(ctx->engines, e); > > INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); > INIT_LIST_HEAD(&ctx->handles_list); > @@ -51,7 +55,7 @@ mock_context(struct drm_i915_private *i915, > > ret = i915_gem_context_pin_hw_id(ctx); > if (ret < 0) > - goto err_handles; > + goto err_engines; > > if (name) { > struct i915_hw_ppgtt *ppgtt; > @@ -69,7 +73,9 @@ mock_context(struct drm_i915_private *i915, > > return ctx; > > -err_handles: > +err_engines: > + free_engines(rcu_access_pointer(ctx->engines)); > +err_free: > kfree(ctx); > return NULL; > >
Quoting Tvrtko Ursulin (2019-04-10 16:32:18) > > On 08/04/2019 10:17, Chris Wilson wrote: > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > index 3f794bc71958..0df3c5238c04 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > @@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx, > > struct intel_context *ce; > > int err; > > > > - ce = intel_context_instance(ctx, engine); > > + ce = i915_gem_context_get_engine(ctx, engine->id); > > Staying with intel_context_instance wouldn't help to reduce the churn? But it takes the GEM context :| intel_context_lookup() ? But it won't be part of gt/intel_context.h And I'd like to have 'get' in there for consistency (although other object lookup functions return a new reference, so that may not be a solid argument). It has annoyed me that this does require the GEM context, but that's the nature of the beast. > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index 50266e87c225..21b4a04c424b 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -4312,8 +4312,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) > > > > static int __intel_engines_record_defaults(struct drm_i915_private *i915) > > { > > - struct i915_gem_context *ctx; > > struct intel_engine_cs *engine; > > + struct i915_gem_context *ctx; > > + struct i915_gem_engines *e; > > enum intel_engine_id id; > > int err = 0; > > > > @@ -4330,18 +4331,26 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) > > if (IS_ERR(ctx)) > > return PTR_ERR(ctx); > > > > + e = i915_gem_context_engine_list_lock(ctx); > > + > > for_each_engine(engine, i915, id) { > > Do we need i915 version of for_each_context_engine? If all call sites > will doing this "lock ctx" -> "walk physical engines" -> "lookup in > context" then it seems a bit disconnected. It's rare, a couple of odd cases imo. > > + struct intel_context *ce = e->engines[id]; > > How will index by engine->id work for engine map? It doesn't, that's the point of these being the odd cases. :) > > struct i915_request *rq; > > > > - rq = i915_request_alloc(engine, ctx); > > + err = intel_context_pin(ce); > > + if (err) > > + goto err_active; > > + > > + rq = i915_request_create(ce); > > + intel_context_unpin(ce); > > Kind of verbose, no? Do you want to add some > middle-ground-between-request-alloc-and-create helper? There's 2 callers of i915_request_create() in this style. (Execbuf has a style all of its own.) At the moment I don't have a good name for the happy middle ground, so I'm willing to pay the price for forcing control over the pin to the user. ... Does it really make any difference for the perma-pinned kernel contexts? Actually no... Hmm. The fundamental objective was being able to pass ce and avoid struct_mutex -- but we already avoid struct_mutex for pinning today as the context is already pinned. The downside is that it adds redundant steps to execbuf, and __i915_request_create() is already taken... And I hope you would say no if I suggest ____i915_request_create :) > > +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) > > +{ > > + struct intel_engine_cs *engine; > > + struct i915_gem_engines *e; > > + enum intel_engine_id id; > > + > > + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); > > + if (!e) > > + return ERR_PTR(-ENOMEM); > > + > > + e->i915 = ctx->i915; > > + for_each_engine(engine, ctx->i915, id) { > > + struct intel_context *ce; > > > > + ce = intel_context_create(ctx, engine); > > + if (IS_ERR(ce)) { > > + free_engines_n(e, id); > > I dislike piggy-back of n into e. How about: > > __free_engines(e, n) > { > ... > } > > free_engines(e) > { > __fre_engines(e, e->num_engines): > } > > ? Ok. > Or even you could e->num_engines++ in the above loop and just have one > free_engines. I thought it was cleaner to avoid having multiple counters for the same loop. free_engines_n() ended up with 5 users. > > + return ERR_CAST(ce); > > + } > > + > > + e->engines[id] = ce; > > Each context would have a sparse array of engines, on most platforms. > Would it be workable to instead create a compact array per context, and > just have a per device translation table of idx to engine->id? Or > vice-versa, I can't figure out straight from the bat which one would you > need. As sparse as we do today. I working under the assumption that going forwards, the default map would be the oddity. But that is better than the sparse fixed array[] we previously embedded into the GEM context, so overall I think still an improvement for old platforms. We can trim the num_engines, i.e. we need only allocate [rcs0] on gen3 as any id>rcs0 will become -EINVAL. One plan I did have was to remove the sparse engine->id (since that should be an internal id), but then we end up with not being able to use static constants for our VCSx etc. > I guess in this scheme you would need some translation as well to > support customised engine maps.. will see if I will spot that later in > the patch. In the scheme in this patch, we just replace ctx->engines[] for the custom map with separate intel_contexts for each instance of a specific engine. > > - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { > > - struct intel_engine_cs *engine = ce->engine; > > + e = i915_gem_context_engine_list_lock(ctx); > > + for (i = 0; i < e->num_engines; i++) { > > + struct intel_context *ce = e->engines[i]; > > struct i915_request *rq; > > > > - if (!(engine->mask & engines)) > > + if (!ce || !(ce->engine->mask & engines)) > > Definitely looks like a case for for_each_context_engine. for_each_context_engine(ce, e, i) for_each_context_masked(ce, e, mask, i) Hmm. I do regret for_each...() that require temporaries. We can't stuff the lock in there, without a bit of work... gem_for_each_context_engine(iter, ctx) { struct intel_context *ce = iter.context; with for (init_iter(&iter, ctx);; next_iter(&iter)) Just to hide the locking. Probably worth it. > > + continue; > > + > > + if (!intel_context_is_pinned(ce)) > > continue; > > > > if (I915_SELFTEST_ONLY(context_barrier_inject_fault & > > - engine->mask)) { > > + ce->engine->mask)) { > > err = -ENXIO; > > break; > > } > > > > - rq = i915_request_alloc(engine, ctx); > > + err = intel_context_pin(ce); > > + if (err) > > + break; > > + > > + rq = i915_request_create(ce); > > + intel_context_unpin(ce); > > Yep as mentioned somewhere above. I definitely think another helper > would help code base redability. Even if called unimaginatively as > __i915_request_add. It is just these two (based on a quick grep). Maybe intel_context_create_request() ? Hmm, but isn't that what i915_request_create() is. Quiet! > > +struct i915_gem_engines { > > + struct rcu_work rcu; > > + struct drm_i915_private *i915; > > + unsigned long num_engines; > > unsigned int? Pointer before, array of pointers after, left an unsigned long hole. I was just filling the space. -Chris
On 10/04/2019 17:18, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2019-04-10 16:32:18) >> >> On 08/04/2019 10:17, Chris Wilson wrote: >>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >>> index 3f794bc71958..0df3c5238c04 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >>> @@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx, >>> struct intel_context *ce; >>> int err; >>> >>> - ce = intel_context_instance(ctx, engine); >>> + ce = i915_gem_context_get_engine(ctx, engine->id); >> >> Staying with intel_context_instance wouldn't help to reduce the churn? > > But it takes the GEM context :| > > intel_context_lookup() ? But it won't be part of gt/intel_context.h > And I'd like to have 'get' in there for consistency (although other > object lookup functions return a new reference, so that may not be a > solid argument). > > It has annoyed me that this does require the GEM context, but that's the > nature of the beast. Leave it as is then. >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >>> index 50266e87c225..21b4a04c424b 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem.c >>> +++ b/drivers/gpu/drm/i915/i915_gem.c >>> @@ -4312,8 +4312,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) >>> >>> static int __intel_engines_record_defaults(struct drm_i915_private *i915) >>> { >>> - struct i915_gem_context *ctx; >>> struct intel_engine_cs *engine; >>> + struct i915_gem_context *ctx; >>> + struct i915_gem_engines *e; >>> enum intel_engine_id id; >>> int err = 0; >>> >>> @@ -4330,18 +4331,26 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) >>> if (IS_ERR(ctx)) >>> return PTR_ERR(ctx); >>> >>> + e = i915_gem_context_engine_list_lock(ctx); >>> + >>> for_each_engine(engine, i915, id) { >> >> Do we need i915 version of for_each_context_engine? If all call sites >> will doing this "lock ctx" -> "walk physical engines" -> "lookup in >> context" then it seems a bit disconnected. > > It's rare, a couple of odd cases imo. Ok, can re-evaluate at the end. >>> + struct intel_context *ce = e->engines[id]; >> >> How will index by engine->id work for engine map? > > It doesn't, that's the point of these being the odd cases. :) > >>> struct i915_request *rq; >>> >>> - rq = i915_request_alloc(engine, ctx); >>> + err = intel_context_pin(ce); >>> + if (err) >>> + goto err_active; >>> + >>> + rq = i915_request_create(ce); >>> + intel_context_unpin(ce); >> >> Kind of verbose, no? Do you want to add some >> middle-ground-between-request-alloc-and-create helper? > > There's 2 callers of i915_request_create() in this style. > (Execbuf has a style all of its own.) > > At the moment I don't have a good name for the happy middle ground, so > I'm willing to pay the price for forcing control over the pin to the > user. > > ... > > Does it really make any difference for the perma-pinned kernel contexts? > Actually no... > > Hmm. The fundamental objective was being able to pass ce and avoid > struct_mutex -- but we already avoid struct_mutex for pinning today as > the context is already pinned. > > The downside is that it adds redundant steps to execbuf, and > __i915_request_create() is already taken... And I hope you would say no > if I suggest ____i915_request_create :) > >>> +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) >>> +{ >>> + struct intel_engine_cs *engine; >>> + struct i915_gem_engines *e; >>> + enum intel_engine_id id; >>> + >>> + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); >>> + if (!e) >>> + return ERR_PTR(-ENOMEM); >>> + >>> + e->i915 = ctx->i915; >>> + for_each_engine(engine, ctx->i915, id) { >>> + struct intel_context *ce; >>> >>> + ce = intel_context_create(ctx, engine); >>> + if (IS_ERR(ce)) { >>> + free_engines_n(e, id); >> >> I dislike piggy-back of n into e. How about: >> >> __free_engines(e, n) >> { >> ... >> } >> >> free_engines(e) >> { >> __fre_engines(e, e->num_engines): >> } >> >> ? > > Ok. > >> Or even you could e->num_engines++ in the above loop and just have one >> free_engines. > > I thought it was cleaner to avoid having multiple counters for the same > loop. free_engines_n() ended up with 5 users. > >>> + return ERR_CAST(ce); >>> + } >>> + >>> + e->engines[id] = ce; >> >> Each context would have a sparse array of engines, on most platforms. >> Would it be workable to instead create a compact array per context, and >> just have a per device translation table of idx to engine->id? Or >> vice-versa, I can't figure out straight from the bat which one would you >> need. > > As sparse as we do today. I working under the assumption that going > forwards, the default map would be the oddity. But that is better than > the sparse fixed array[] we previously embedded into the GEM context, so > overall I think still an improvement for old platforms. True. > We can trim the num_engines, i.e. we need only allocate [rcs0] on gen3 > as any id>rcs0 will become -EINVAL. > > One plan I did have was to remove the sparse engine->id (since that > should be an internal id), but then we end up with not being able to use > static constants for our VCSx etc. Best leave that for now. >> I guess in this scheme you would need some translation as well to >> support customised engine maps.. will see if I will spot that later in >> the patch. > > In the scheme in this patch, we just replace ctx->engines[] for the > custom map with separate intel_contexts for each instance of a specific > engine. > >>> - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { >>> - struct intel_engine_cs *engine = ce->engine; >>> + e = i915_gem_context_engine_list_lock(ctx); >>> + for (i = 0; i < e->num_engines; i++) { >>> + struct intel_context *ce = e->engines[i]; >>> struct i915_request *rq; >>> >>> - if (!(engine->mask & engines)) >>> + if (!ce || !(ce->engine->mask & engines)) >> >> Definitely looks like a case for for_each_context_engine. > > for_each_context_engine(ce, e, i) > for_each_context_masked(ce, e, mask, i) > > Hmm. I do regret for_each...() that require temporaries. > > We can't stuff the lock in there, without a bit of work... > > gem_for_each_context_engine(iter, ctx) { > struct intel_context *ce = iter.context; > > with > for (init_iter(&iter, ctx);; next_iter(&iter)) > > Just to hide the locking. Probably worth it. Don't go to wild! :) We got used to temporaries so could also clean all of them later if too bored. >>> + continue; >>> + >>> + if (!intel_context_is_pinned(ce)) >>> continue; >>> >>> if (I915_SELFTEST_ONLY(context_barrier_inject_fault & >>> - engine->mask)) { >>> + ce->engine->mask)) { >>> err = -ENXIO; >>> break; >>> } >>> >>> - rq = i915_request_alloc(engine, ctx); >>> + err = intel_context_pin(ce); >>> + if (err) >>> + break; >>> + >>> + rq = i915_request_create(ce); >>> + intel_context_unpin(ce); >> >> Yep as mentioned somewhere above. I definitely think another helper >> would help code base redability. Even if called unimaginatively as >> __i915_request_add. > > It is just these two (based on a quick grep). > > Maybe intel_context_create_request() ? > > Hmm, but isn't that what i915_request_create() is. Quiet! Why not __i915_request_add? i915_request_add would then be just wedged check calling __i915_request_add, no? (Going from memory.. might not be reliable.) >>> +struct i915_gem_engines { >>> + struct rcu_work rcu; >>> + struct drm_i915_private *i915; >>> + unsigned long num_engines; >> >> unsigned int? > > Pointer before, array of pointers after, left an unsigned long hole. > > I was just filling the space. Well long makes me think there's a reason int is not enough. Which in this case there isn't. So I would still go for an int regardless of the hole or not. There is nothing to be gained by filling space. Could even be worse if some instructions expand to longer opcodes. :) Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-04-11 14:05:19) > > On 10/04/2019 17:18, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2019-04-10 16:32:18) > >> Yep as mentioned somewhere above. I definitely think another helper > >> would help code base redability. Even if called unimaginatively as > >> __i915_request_add. > > > > It is just these two (based on a quick grep). > > > > Maybe intel_context_create_request() ? > > > > Hmm, but isn't that what i915_request_create() is. Quiet! > > Why not __i915_request_add? i915_request_add would then be just wedged > check calling __i915_request_add, no? (Going from memory.. might not be > reliable.) At the moment, we have: __i915_request_create(): no timeline locking, no runtime-pm i915_request_create(): no pinning, does timeline + runtime-pm intel_context_create_request(): pin + call i915_request_create igt_request_alloc(): lookup intel_context + call intel_context_create_request I suppose there is some room in bringing i915_request_alloc() back from the dead, but it's not the simple allocator one would expect from the name. (igt_request_alloc is just for ease of recognition.) Outside of igt_request_alloc(), there are two users for intel_context_create_request() in my tree. One in __intel_engine_record_defaults() and the other in context_barrier_task(). For the in-kernel blitter tasks, we'll either be using a pinned kernel context, or be shoehorning it into a pinned user context. Both should be using i915_request_create() (as currently planned). And things like the heartbeat and idle barriers, use the pinned kernel context. Just to say intel_context_create_request() is the odd one out, and deserves the longest name :) > >>> +struct i915_gem_engines { > >>> + struct rcu_work rcu; > >>> + struct drm_i915_private *i915; > >>> + unsigned long num_engines; > >> > >> unsigned int? > > > > Pointer before, array of pointers after, left an unsigned long hole. > > > > I was just filling the space. > > Well long makes me think there's a reason int is not enough. Which in > this case there isn't. And I thought you were planning for a busy future, full of little engines :) > So I would still go for an int regardless of the > hole or not. There is nothing to be gained by filling space. Could even > be worse if some instructions expand to longer opcodes. :) Hmm, in the near future this becomes struct i915_gem_engines { struct rcu_head rcu; unsigned long num_engines; struct intel_context *engines[]; }; struct rcu_head is a pair of pointers, so that's still a pointer sized hole. I give in. We'll only support 4 billion engines. -Chris
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 46bf8d8fb7e4..803ac9a1dd15 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -17,7 +17,7 @@ static struct i915_global_context { struct kmem_cache *slab_ce; } global; -struct intel_context *intel_context_alloc(void) +static struct intel_context *intel_context_alloc(void) { return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); } @@ -28,104 +28,17 @@ void intel_context_free(struct intel_context *ce) } struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, +intel_context_create(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = NULL; - struct rb_node *p; - - spin_lock(&ctx->hw_contexts_lock); - p = ctx->hw_contexts.rb_node; - while (p) { - struct intel_context *this = - rb_entry(p, struct intel_context, node); - - if (this->engine == engine) { - GEM_BUG_ON(this->gem_context != ctx); - ce = this; - break; - } - - if (this->engine < engine) - p = p->rb_right; - else - p = p->rb_left; - } - spin_unlock(&ctx->hw_contexts_lock); - - return ce; -} - -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce) -{ - struct rb_node **p, *parent; - int err = 0; - - spin_lock(&ctx->hw_contexts_lock); - - parent = NULL; - p = &ctx->hw_contexts.rb_node; - while (*p) { - struct intel_context *this; - - parent = *p; - this = rb_entry(parent, struct intel_context, node); - - if (this->engine == engine) { - err = -EEXIST; - ce = this; - break; - } - - if (this->engine < engine) - p = &parent->rb_right; - else - p = &parent->rb_left; - } - if (!err) { - rb_link_node(&ce->node, parent, p); - rb_insert_color(&ce->node, &ctx->hw_contexts); - } - - spin_unlock(&ctx->hw_contexts_lock); - - return ce; -} - -void __intel_context_remove(struct intel_context *ce) -{ - struct i915_gem_context *ctx = ce->gem_context; - - spin_lock(&ctx->hw_contexts_lock); - rb_erase(&ce->node, &ctx->hw_contexts); - spin_unlock(&ctx->hw_contexts_lock); -} - -struct intel_context * -intel_context_instance(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct intel_context *ce, *pos; - - ce = intel_context_lookup(ctx, engine); - if (likely(ce)) - return intel_context_get(ce); + struct intel_context *ce; ce = intel_context_alloc(); if (!ce) return ERR_PTR(-ENOMEM); intel_context_init(ce, ctx, engine); - - pos = __intel_context_insert(ctx, engine, ce); - if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */ - intel_context_free(ce); - - GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos); - return intel_context_get(pos); + return ce; } int __intel_context_do_pin(struct intel_context *ce) @@ -204,6 +117,8 @@ intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + GEM_BUG_ON(!engine->cops); + kref_init(&ce->ref); ce->gem_context = ctx; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 4f8ef45e6344..567a415be513 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,24 +12,16 @@ #include "intel_context_types.h" #include "intel_engine_types.h" -struct intel_context *intel_context_alloc(void); -void intel_context_free(struct intel_context *ce); - void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); -/** - * intel_context_lookup - Find the matching HW context for this (ctx, engine) - * @ctx - the parent GEM context - * @engine - the target HW engine - * - * May return NULL if the HW context hasn't been instantiated (i.e. unused). - */ struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, +intel_context_create(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +void intel_context_free(struct intel_context *ce); + /** * intel_context_pin_lock - Stablises the 'pinned' status of the HW context * @ctx - the parent GEM context @@ -59,17 +51,6 @@ intel_context_is_pinned(struct intel_context *ce) void intel_context_pin_unlock(struct intel_context *ce); -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce); -void -__intel_context_remove(struct intel_context *ce); - -struct intel_context * -intel_context_instance(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - int __intel_context_do_pin(struct intel_context *ce); static inline int intel_context_pin(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index f02d27734e3b..3579c2708321 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -10,7 +10,6 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/mutex.h> -#include <linux/rbtree.h> #include <linux/types.h> #include "i915_active_types.h" @@ -61,7 +60,6 @@ struct intel_context { struct i915_active_request active_tracker; const struct intel_context_ops *ops; - struct rb_node node; /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3f794bc71958..0df3c5238c04 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx, struct intel_context *ce; int err; - ce = intel_context_instance(ctx, engine); + ce = i915_gem_context_get_engine(ctx, engine->id); if (IS_ERR(ce)) return PTR_ERR(ce); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 3b672e011cf0..87e538825fee 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,6 +23,7 @@ */ #include "i915_drv.h" +#include "i915_gem_context.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -286,7 +287,7 @@ int mock_engine_init(struct intel_engine_cs *engine) i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); engine->kernel_context = - intel_context_instance(i915->kernel_context, engine); + i915_gem_context_get_engine(i915->kernel_context, engine->id); if (IS_ERR(engine->kernel_context)) goto err_timeline; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 606fc2713240..8b6574e1b495 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1188,7 +1188,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&s->workload_q_head[i]); s->shadow[i] = ERR_PTR(-EINVAL); - ce = intel_context_instance(ctx, engine); + ce = i915_gem_context_get_engine(ctx, i); if (IS_ERR(ce)) { ret = PTR_ERR(ce); goto out_shadow_ctx; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 50266e87c225..21b4a04c424b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4312,8 +4312,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) static int __intel_engines_record_defaults(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct i915_gem_engines *e; enum intel_engine_id id; int err = 0; @@ -4330,18 +4331,26 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (IS_ERR(ctx)) return PTR_ERR(ctx); + e = i915_gem_context_engine_list_lock(ctx); + for_each_engine(engine, i915, id) { + struct intel_context *ce = e->engines[id]; struct i915_request *rq; - rq = i915_request_alloc(engine, ctx); + err = intel_context_pin(ce); + if (err) + goto err_active; + + rq = i915_request_create(ce); + intel_context_unpin(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_ctx; + goto err_active; } err = 0; - if (engine->init_context) - err = engine->init_context(rq); + if (rq->engine->init_context) + err = rq->engine->init_context(rq); i915_request_add(rq); if (err) @@ -4355,15 +4364,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } for_each_engine(engine, i915, id) { - struct intel_context *ce; - struct i915_vma *state; + struct intel_context *ce = e->engines[id]; + struct i915_vma *state = ce->state; void *vaddr; - ce = intel_context_lookup(ctx, engine); - if (!ce) - continue; - - state = ce->state; if (!state) continue; @@ -4419,6 +4423,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } out_ctx: + i915_gem_context_engine_list_unlock(ctx); i915_gem_context_set_closed(ctx); i915_gem_context_put(ctx); return err; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 1e84fe0a4c55..517816558c85 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -150,7 +150,7 @@ lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance) if (!engine) return ERR_PTR(-EINVAL); - return intel_context_instance(ctx, engine); + return i915_gem_context_get_engine(ctx, engine->id); } static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) @@ -242,10 +242,54 @@ static void release_hw_id(struct i915_gem_context *ctx) mutex_unlock(&i915->contexts.mutex); } -static void i915_gem_context_free(struct i915_gem_context *ctx) +static void free_engines(struct i915_gem_engines *e) +{ + unsigned int n; + + for (n = 0; n < e->num_engines; n++) { + if (!e->engines[n]) + continue; + + intel_context_put(e->engines[n]); + } + kfree(e); +} + +static void free_engines_n(struct i915_gem_engines *e, unsigned long n) { - struct intel_context *it, *n; + e->num_engines = n; + free_engines(e); +} + +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +{ + struct intel_engine_cs *engine; + struct i915_gem_engines *e; + enum intel_engine_id id; + + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + e->i915 = ctx->i915; + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce; + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { + free_engines_n(e, id); + return ERR_CAST(ce); + } + + e->engines[id] = ce; + } + e->num_engines = id; + + return e; +} + +static void i915_gem_context_free(struct i915_gem_context *ctx) +{ lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); GEM_BUG_ON(!list_empty(&ctx->active_engines)); @@ -253,8 +297,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) release_hw_id(ctx); i915_ppgtt_put(ctx->ppgtt); - rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node) - intel_context_put(it); + free_engines(rcu_access_pointer(ctx->engines)); + mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) i915_timeline_put(ctx->timeline); @@ -363,6 +407,8 @@ static struct i915_gem_context * __create_context(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int err; int i; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -376,8 +422,13 @@ __create_context(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&ctx->active_engines); mutex_init(&ctx->mutex); - ctx->hw_contexts = RB_ROOT; - spin_lock_init(&ctx->hw_contexts_lock); + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_free; + } + RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -399,6 +450,10 @@ __create_context(struct drm_i915_private *dev_priv) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; return ctx; + +err_free: + kfree(ctx); + return ERR_PTR(err); } static struct i915_hw_ppgtt * @@ -862,7 +917,8 @@ static int context_barrier_task(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; - struct intel_context *ce, *next; + struct i915_gem_engines *e; + unsigned int i; int err = 0; lockdep_assert_held(&i915->drm.struct_mutex); @@ -875,20 +931,29 @@ static int context_barrier_task(struct i915_gem_context *ctx, i915_active_init(i915, &cb->base, cb_retire); i915_active_acquire(&cb->base); - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { - struct intel_engine_cs *engine = ce->engine; + e = i915_gem_context_engine_list_lock(ctx); + for (i = 0; i < e->num_engines; i++) { + struct intel_context *ce = e->engines[i]; struct i915_request *rq; - if (!(engine->mask & engines)) + if (!ce || !(ce->engine->mask & engines)) + continue; + + if (!intel_context_is_pinned(ce)) continue; if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - engine->mask)) { + ce->engine->mask)) { err = -ENXIO; break; } - rq = i915_request_alloc(engine, ctx); + err = intel_context_pin(ce); + if (err) + break; + + rq = i915_request_create(ce); + intel_context_unpin(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -904,6 +969,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } + i915_gem_context_engine_list_unlock(ctx); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 5a8e080499fb..d94fc4c1907c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -176,6 +176,48 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_gem_engines * +i915_gem_context_engine_list(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->engines, + lockdep_is_held(&ctx->engines_mutex)); +} + +static inline struct i915_gem_engines * +i915_gem_context_engine_list_lock(struct i915_gem_context *ctx) + __acquires(&ctx->engines_mutex) +{ + mutex_lock(&ctx->engines_mutex); + return i915_gem_context_engine_list(ctx); +} + +static inline void +i915_gem_context_engine_list_unlock(struct i915_gem_context *ctx) + __releases(&ctx->engines_mutex) +{ + mutex_unlock(&ctx->engines_mutex); +} + +static inline struct intel_context * +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned long idx) +{ + return i915_gem_context_engine_list(ctx)->engines[idx]; +} + +static inline struct intel_context * +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned long idx) +{ + struct intel_context *ce = ERR_PTR(-EINVAL); + + rcu_read_lock(); { + struct i915_gem_engines *e = rcu_dereference(ctx->engines); + if (likely(idx < e->num_engines && e->engines[idx])) + ce = intel_context_get(e->engines[idx]); + } rcu_read_unlock(); + + return ce; +} + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index d282a6ab3b9f..1610a74b0283 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -29,6 +29,13 @@ struct i915_hw_ppgtt; struct i915_timeline; struct intel_ring; +struct i915_gem_engines { + struct rcu_work rcu; + struct drm_i915_private *i915; + unsigned long num_engines; + struct intel_context *engines[]; +}; + /** * struct i915_gem_context - client state * @@ -42,6 +49,21 @@ struct i915_gem_context { /** file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; + /** + * @engines: User defined engines for this context + * + * NULL means to use legacy definitions (including random meaning of + * I915_EXEC_BSD with I915_EXEC_BSD_SELECTOR overrides). + * + * If defined, execbuf uses the I915_EXEC_MASK as an index into + * array, and various uAPI other the ability to lookup up an + * index from this array to select an engine operate on. + * + * User defined by I915_CONTEXT_PARAM_ENGINE. + */ + struct i915_gem_engines __rcu *engines; + struct mutex engines_mutex; /* guards writes to engines */ + struct i915_timeline *timeline; /** @@ -134,10 +156,6 @@ struct i915_gem_context { struct i915_sched_attr sched; - /** hw_contexts: per-engine logical HW state */ - struct rb_root hw_contexts; - spinlock_t hw_contexts_lock; - /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; /** desc_template: invariant fields for the HW context descriptor */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c700cbc2f594..941192c60832 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2077,9 +2077,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, return file_priv->bsd_engine; } -#define I915_USER_RINGS (4) - -static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { +static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_DEFAULT] = RCS0, [I915_EXEC_RENDER] = RCS0, [I915_EXEC_BLT] = BCS0, @@ -2087,10 +2085,8 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, - struct intel_engine_cs *engine) +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) { - struct intel_context *ce; int err; /* @@ -2101,21 +2097,16 @@ static int eb_pin_context(struct i915_execbuffer *eb, if (err) return err; - ce = intel_context_instance(eb->gem_context, engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for * ourselves. */ err = intel_context_pin(ce); - intel_context_put(ce); if (err) return err; - eb->engine = engine; + eb->engine = ce->engine; eb->context = ce; return 0; } @@ -2125,24 +2116,18 @@ static void eb_unpin_context(struct i915_execbuffer *eb) intel_context_unpin(eb->context); } -static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +static unsigned int +eb_select_legacy_ring(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; - struct intel_engine_cs *engine; - - if (user_ring_id > I915_USER_RINGS) { - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; - } - if ((user_ring_id != I915_EXEC_BSD) && - ((args->flags & I915_EXEC_BSD_MASK) != 0)) { + if (user_ring_id != I915_EXEC_BSD && + (args->flags & I915_EXEC_BSD_MASK)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return -1; } if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) { @@ -2157,20 +2142,39 @@ eb_select_engine(struct i915_execbuffer *eb, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return -1; } - engine = eb->i915->engine[_VCS(bsd_idx)]; - } else { - engine = eb->i915->engine[user_ring_map[user_ring_id]]; + return _VCS(bsd_idx); } - if (!engine) { - DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); + return -1; } - return eb_pin_context(eb, engine); + return user_ring_map[user_ring_id]; +} + +static int +eb_select_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct intel_context *ce; + unsigned int idx; + int err; + + idx = eb_select_legacy_ring(eb, file, args); + + ce = i915_gem_context_get_engine(eb->gem_context, idx); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = eb_pin_context(eb, ce); + intel_context_put(ce); + + return err; } static void diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index afaeabe5e531..304e597e19e6 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1203,35 +1203,37 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) { - struct intel_engine_cs *engine = i915->engine[RCS0]; - struct intel_context *ce; + struct i915_gem_engines *e; + unsigned int i; int err; - ce = intel_context_instance(ctx, engine); - if (IS_ERR(ce)) - return ce; - err = i915_mutex_lock_interruptible(&i915->drm); - if (err) { - intel_context_put(ce); + if (err) return ERR_PTR(err); - } - /* - * As the ID is the gtt offset of the context's vma we - * pin the vma to ensure the ID remains fixed. - * - * NB: implied RCS engine... - */ - err = intel_context_pin(ce); + e = i915_gem_context_engine_list_lock(ctx); + for (i = 0; i < e->num_engines; i++) { + struct intel_context *ce = e->engines[i]; + + if (ce->engine->class != RENDER_CLASS) + continue; + + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + */ + err = intel_context_pin(ce); + if (err == 0) { + i915->perf.oa.pinned_ctx = ce; + break; + } + } + i915_gem_context_engine_list_unlock(ctx); mutex_unlock(&i915->drm.struct_mutex); - intel_context_put(ce); if (err) return ERR_PTR(err); - i915->perf.oa.pinned_ctx = ce; - - return ce; + return i915->perf.oa.pinned_ctx; } /** @@ -1717,10 +1719,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; struct i915_request *rq; + unsigned int i; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -1746,30 +1748,41 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = intel_context_lookup(ctx, engine); - u32 *regs; + struct i915_gem_engines *e = + i915_gem_context_engine_list_lock(ctx); - /* OA settings will be set upon first use */ - if (!ce || !ce->state) - continue; + for (i = 0; i < e->num_engines; i++) { + struct intel_context *ce = e->engines[i]; + u32 *regs; + + if (!ce || ce->engine->class != RENDER_CLASS) + continue; + + /* OA settings will be set upon first use */ + if (!ce->state) + continue; - regs = i915_gem_object_pin_map(ce->state->obj, map_type); - if (IS_ERR(regs)) - return PTR_ERR(regs); + regs = i915_gem_object_pin_map(ce->state->obj, + map_type); + if (IS_ERR(regs)) + return PTR_ERR(regs); - ce->state->obj->mm.dirty = true; - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); - gen8_update_reg_state_unlocked(ce, regs, oa_config); + gen8_update_reg_state_unlocked(ce, regs, oa_config); + + i915_gem_object_unpin_map(ce->state->obj); + } - i915_gem_object_unpin_map(ce->state->obj); + i915_gem_context_engine_list_unlock(ctx); } /* * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_create(engine->kernel_context); + rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8abd891d9287..5e52c1623c17 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -767,7 +767,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ce = intel_context_instance(ctx, engine); + ce = i915_gem_context_get_engine(ctx, engine->id); if (IS_ERR(ce)) return ERR_CAST(ce); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 7fbfcb3d63e0..ae230b38138a 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -364,11 +364,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc) static void guc_stage_desc_init(struct intel_guc_client *client) { struct intel_guc *guc = client->guc; - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; + struct i915_gem_engines *e; struct guc_stage_desc *desc; - unsigned int tmp; + unsigned int i; u32 gfx_addr; desc = __get_stage_desc(client); @@ -382,10 +381,13 @@ static void guc_stage_desc_init(struct intel_guc_client *client) desc->priority = client->priority; desc->db_id = client->doorbell_id; - for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = intel_context_lookup(ctx, engine); - u32 guc_engine_id = engine->guc_id; - struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; + e = i915_gem_context_engine_list_lock(ctx); + for (i = 0; i < e->num_engines; i++) { + struct intel_context *ce = e->engines[i]; + struct guc_execlist_context *lrc; + + if (!ce || !(ce->engine->mask & client->engines)) + continue; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -394,7 +396,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * for now who owns a GuC client. But for future owner of GuC * client, need to make sure lrc is pinned prior to enter here. */ - if (!ce || !ce->state) + if (!ce->state) break; /* XXX: continue? */ /* @@ -404,6 +406,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * Instead, the GuC uses the LRCA of the user mode context (see * guc_add_request below). */ + lrc = &desc->lrc[ce->engine->guc_id]; lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ @@ -414,15 +417,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * here. In proxy submission, it wants the stage id */ lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | - (guc_engine_id << GUC_ELC_ENGINE_OFFSET); + (ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET); lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma); lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc->engines_used |= (1 << guc_engine_id); + desc->engines_used |= BIT(ce->engine->guc_id); } + i915_gem_context_engine_list_unlock(ctx); DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", client->engines, desc->engines_used); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 214d1fd2f4dc..7fd224a4ca4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1094,7 +1094,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(i915); - ce = intel_context_instance(ctx, i915->engine[RCS0]); + ce = i915_gem_context_get_engine(ctx, RCS0); if (IS_ERR(ce)) { ret = PTR_ERR(ce); goto out_rpm; diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 0426093bf1d9..71c750693585 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915, const char *name) { struct i915_gem_context *ctx; + struct i915_gem_engines *e; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -40,8 +41,11 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; - ctx->hw_contexts = RB_ROOT; - spin_lock_init(&ctx->hw_contexts_lock); + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) + goto err_free; + RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -51,7 +55,7 @@ mock_context(struct drm_i915_private *i915, ret = i915_gem_context_pin_hw_id(ctx); if (ret < 0) - goto err_handles; + goto err_engines; if (name) { struct i915_hw_ppgtt *ppgtt; @@ -69,7 +73,9 @@ mock_context(struct drm_i915_private *i915, return ctx; -err_handles: +err_engines: + free_engines(rcu_access_pointer(ctx->engines)); +err_free: kfree(ctx); return NULL;
We switched to a tree of per-engine HW context to accommodate the introduction of virtual engines. However, we plan to also support multiple instances of the same engine within the GEM context, defeating our use of the engine as a key to looking up the HW context. Just allocate a logical per-engine instance and always use an index into the ctx->engines[]. Later on, this ctx->engines[] may be replaced by a user specified map. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gt/intel_context.c | 97 ++----------------- drivers/gpu/drm/i915/gt/intel_context.h | 25 +---- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 - drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/mock_engine.c | 3 +- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 29 +++--- drivers/gpu/drm/i915/i915_gem_context.c | 92 +++++++++++++++--- drivers/gpu/drm/i915/i915_gem_context.h | 42 ++++++++ drivers/gpu/drm/i915/i915_gem_context_types.h | 26 ++++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 70 ++++++------- drivers/gpu/drm/i915/i915_perf.c | 81 +++++++++------- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 24 +++-- .../gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/mock_context.c | 14 ++- 16 files changed, 283 insertions(+), 230 deletions(-)