Message ID | 20210716201724.54804-26-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | GuC submission support | expand |
On 7/16/2021 13:16, Matthew Brost wrote: > Move active request tracking to a backend vfunc rather than assuming all > backends want to do this in the maner. In the case execlists / maner -> manner. In the case *of* execlists With those fixed... Reviewed-by: John Harrison <John.C.Harrison@Intel.com> > ring submission the tracking is on the physical engine while with GuC > submission it is on the context. > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_context.c | 3 ++ > drivers/gpu/drm/i915/gt/intel_context_types.h | 7 ++++ > drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 +++ > .../drm/i915/gt/intel_execlists_submission.c | 40 ++++++++++++++++++ > .../gpu/drm/i915/gt/intel_ring_submission.c | 22 ++++++++++ > drivers/gpu/drm/i915/gt/mock_engine.c | 30 ++++++++++++++ > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 33 +++++++++++++++ > drivers/gpu/drm/i915/i915_request.c | 41 ++----------------- > drivers/gpu/drm/i915/i915_request.h | 2 + > 9 files changed, 147 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index 251ff7eea22d..bfb05d8697d1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -393,6 +393,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) > spin_lock_init(&ce->guc_state.lock); > INIT_LIST_HEAD(&ce->guc_state.fences); > > + spin_lock_init(&ce->guc_active.lock); > + INIT_LIST_HEAD(&ce->guc_active.requests); > + > ce->guc_id = GUC_INVALID_LRC_ID; > INIT_LIST_HEAD(&ce->guc_id_link); > > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h > index 542c98418771..035108c10b2c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -162,6 +162,13 @@ struct intel_context { > struct list_head fences; > } guc_state; > > + struct { > + /** lock: protects everything in guc_active */ > + spinlock_t lock; > + /** requests: active requests on this context */ > + struct list_head requests; > + } guc_active; > + > /* GuC scheduling state flags that do not require a lock. */ > atomic_t guc_sched_state_no_lock; > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 03a81e8d87f4..950fc73ed6af 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -420,6 +420,12 @@ struct intel_engine_cs { > > void (*release)(struct intel_engine_cs *engine); > > + /* > + * Add / remove request from engine active tracking > + */ > + void (*add_active_request)(struct i915_request *rq); > + void (*remove_active_request)(struct i915_request *rq); > + > struct intel_engine_execlists execlists; > > /* > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index abe48421fd7a..f9b5f54a5abe 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -3106,6 +3106,42 @@ static void execlists_park(struct intel_engine_cs *engine) > cancel_timer(&engine->execlists.preempt); > } > > +static void add_to_engine(struct i915_request *rq) > +{ > + lockdep_assert_held(&rq->engine->sched_engine->lock); > + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); > +} > + > +static void remove_from_engine(struct i915_request *rq) > +{ > + struct intel_engine_cs *engine, *locked; > + > + /* > + * Virtual engines complicate acquiring the engine timeline lock, > + * as their rq->engine pointer is not stable until under that > + * engine lock. The simple ploy we use is to take the lock then > + * check that the rq still belongs to the newly locked engine. > + */ > + locked = READ_ONCE(rq->engine); > + spin_lock_irq(&locked->sched_engine->lock); > + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { > + spin_unlock(&locked->sched_engine->lock); > + spin_lock(&engine->sched_engine->lock); > + locked = engine; > + } > + list_del_init(&rq->sched.link); > + > + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > + > + /* Prevent further __await_execution() registering a cb, then flush */ > + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); > + > + spin_unlock_irq(&locked->sched_engine->lock); > + > + i915_request_notify_execute_cb_imm(rq); > +} > + > static bool can_preempt(struct intel_engine_cs *engine) > { > if (GRAPHICS_VER(engine->i915) > 8) > @@ -3206,6 +3242,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) > engine->cops = &execlists_context_ops; > engine->request_alloc = execlists_request_alloc; > engine->bump_serial = execlist_bump_serial; > + engine->add_active_request = add_to_engine; > + engine->remove_active_request = remove_from_engine; > > engine->reset.prepare = execlists_reset_prepare; > engine->reset.rewind = execlists_reset_rewind; > @@ -3797,6 +3835,8 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) > "v%dx%d", ve->base.class, count); > ve->base.context_size = sibling->context_size; > > + ve->base.add_active_request = sibling->add_active_request; > + ve->base.remove_active_request = sibling->remove_active_request; > ve->base.emit_bb_start = sibling->emit_bb_start; > ve->base.emit_flush = sibling->emit_flush; > ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; > diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c > index 61469c631057..3b1471c50d40 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c > @@ -1052,6 +1052,25 @@ static void ring_bump_serial(struct intel_engine_cs *engine) > engine->serial++; > } > > +static void add_to_engine(struct i915_request *rq) > +{ > + lockdep_assert_held(&rq->engine->sched_engine->lock); > + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); > +} > + > +static void remove_from_engine(struct i915_request *rq) > +{ > + spin_lock_irq(&rq->engine->sched_engine->lock); > + list_del_init(&rq->sched.link); > + > + /* Prevent further __await_execution() registering a cb, then flush */ > + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); > + > + spin_unlock_irq(&rq->engine->sched_engine->lock); > + > + i915_request_notify_execute_cb_imm(rq); > +} > + > static void setup_common(struct intel_engine_cs *engine) > { > struct drm_i915_private *i915 = engine->i915; > @@ -1069,6 +1088,9 @@ static void setup_common(struct intel_engine_cs *engine) > engine->reset.cancel = reset_cancel; > engine->reset.finish = reset_finish; > > + engine->add_active_request = add_to_engine; > + engine->remove_active_request = remove_from_engine; > + > engine->cops = &ring_context_ops; > engine->request_alloc = ring_request_alloc; > engine->bump_serial = ring_bump_serial; > diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c > index fc5a65ab1937..5f86ff79c98c 100644 > --- a/drivers/gpu/drm/i915/gt/mock_engine.c > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c > @@ -235,6 +235,34 @@ static void mock_submit_request(struct i915_request *request) > spin_unlock_irqrestore(&engine->hw_lock, flags); > } > > +static void mock_add_to_engine(struct i915_request *rq) > +{ > + lockdep_assert_held(&rq->engine->sched_engine->lock); > + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); > +} > + > +static void mock_remove_from_engine(struct i915_request *rq) > +{ > + struct intel_engine_cs *engine, *locked; > + > + /* > + * Virtual engines complicate acquiring the engine timeline lock, > + * as their rq->engine pointer is not stable until under that > + * engine lock. The simple ploy we use is to take the lock then > + * check that the rq still belongs to the newly locked engine. > + */ > + > + locked = READ_ONCE(rq->engine); > + spin_lock_irq(&locked->sched_engine->lock); > + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { > + spin_unlock(&locked->sched_engine->lock); > + spin_lock(&engine->sched_engine->lock); > + locked = engine; > + } > + list_del_init(&rq->sched.link); > + spin_unlock_irq(&locked->sched_engine->lock); > +} > + > static void mock_reset_prepare(struct intel_engine_cs *engine) > { > } > @@ -327,6 +355,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, > engine->base.emit_flush = mock_emit_flush; > engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; > engine->base.submit_request = mock_submit_request; > + engine->base.add_active_request = mock_add_to_engine; > + engine->base.remove_active_request = mock_remove_from_engine; > > engine->base.reset.prepare = mock_reset_prepare; > engine->base.reset.rewind = mock_reset_rewind; > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 9f28899ff17f..f8a6077fa3f9 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -1147,6 +1147,33 @@ static int guc_context_alloc(struct intel_context *ce) > return lrc_alloc(ce, ce->engine); > } > > +static void add_to_context(struct i915_request *rq) > +{ > + struct intel_context *ce = rq->context; > + > + spin_lock(&ce->guc_active.lock); > + list_move_tail(&rq->sched.link, &ce->guc_active.requests); > + spin_unlock(&ce->guc_active.lock); > +} > + > +static void remove_from_context(struct i915_request *rq) > +{ > + struct intel_context *ce = rq->context; > + > + spin_lock_irq(&ce->guc_active.lock); > + > + list_del_init(&rq->sched.link); > + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + > + /* Prevent further __await_execution() registering a cb, then flush */ > + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); > + > + spin_unlock_irq(&ce->guc_active.lock); > + > + atomic_dec(&ce->guc_id_ref); > + i915_request_notify_execute_cb_imm(rq); > +} > + > static const struct intel_context_ops guc_context_ops = { > .alloc = guc_context_alloc, > > @@ -1567,6 +1594,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) > engine->cops = &guc_context_ops; > engine->request_alloc = guc_request_alloc; > engine->bump_serial = guc_bump_serial; > + engine->add_active_request = add_to_context; > + engine->remove_active_request = remove_from_context; > > engine->sched_engine->schedule = i915_schedule; > > @@ -1931,6 +1960,10 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) > "v%dx%d", ve->base.class, count); > ve->base.context_size = sibling->context_size; > > + ve->base.add_active_request = > + sibling->add_active_request; > + ve->base.remove_active_request = > + sibling->remove_active_request; > ve->base.emit_bb_start = sibling->emit_bb_start; > ve->base.emit_flush = sibling->emit_flush; > ve->base.emit_init_breadcrumb = > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index b3c792d55321..4eba848b20e3 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -182,7 +182,7 @@ static bool irq_work_imm(struct irq_work *wrk) > return false; > } > > -static void __notify_execute_cb_imm(struct i915_request *rq) > +void i915_request_notify_execute_cb_imm(struct i915_request *rq) > { > __notify_execute_cb(rq, irq_work_imm); > } > @@ -256,37 +256,6 @@ i915_request_active_engine(struct i915_request *rq, > return ret; > } > > - > -static void remove_from_engine(struct i915_request *rq) > -{ > - struct intel_engine_cs *engine, *locked; > - > - /* > - * Virtual engines complicate acquiring the engine timeline lock, > - * as their rq->engine pointer is not stable until under that > - * engine lock. The simple ploy we use is to take the lock then > - * check that the rq still belongs to the newly locked engine. > - */ > - locked = READ_ONCE(rq->engine); > - spin_lock_irq(&locked->sched_engine->lock); > - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { > - spin_unlock(&locked->sched_engine->lock); > - spin_lock(&engine->sched_engine->lock); > - locked = engine; > - } > - list_del_init(&rq->sched.link); > - > - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > - clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > - > - /* Prevent further __await_execution() registering a cb, then flush */ > - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); > - > - spin_unlock_irq(&locked->sched_engine->lock); > - > - __notify_execute_cb_imm(rq); > -} > - > static void __rq_init_watchdog(struct i915_request *rq) > { > rq->watchdog.timer.function = NULL; > @@ -383,9 +352,7 @@ bool i915_request_retire(struct i915_request *rq) > * after removing the breadcrumb and signaling it, so that we do not > * inadvertently attach the breadcrumb to a completed request. > */ > - if (!list_empty(&rq->sched.link)) > - remove_from_engine(rq); > - atomic_dec(&rq->context->guc_id_ref); > + rq->engine->remove_active_request(rq); > GEM_BUG_ON(!llist_empty(&rq->execute_cb)); > > __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ > @@ -516,7 +483,7 @@ __await_execution(struct i915_request *rq, > if (llist_add(&cb->work.node.llist, &signal->execute_cb)) { > if (i915_request_is_active(signal) || > __request_in_flight(signal)) > - __notify_execute_cb_imm(signal); > + i915_request_notify_execute_cb_imm(signal); > } > > return 0; > @@ -653,7 +620,7 @@ bool __i915_request_submit(struct i915_request *request) > result = true; > > GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); > - list_move_tail(&request->sched.link, &engine->sched_engine->requests); > + engine->add_active_request(request); > active: > clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); > set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 717e5b292046..128030f43bbf 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -647,4 +647,6 @@ bool > i915_request_active_engine(struct i915_request *rq, > struct intel_engine_cs **active); > > +void i915_request_notify_execute_cb_imm(struct i915_request *rq); > + > #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 251ff7eea22d..bfb05d8697d1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -393,6 +393,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_state.lock); INIT_LIST_HEAD(&ce->guc_state.fences); + spin_lock_init(&ce->guc_active.lock); + INIT_LIST_HEAD(&ce->guc_active.requests); + ce->guc_id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id_link); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 542c98418771..035108c10b2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -162,6 +162,13 @@ struct intel_context { struct list_head fences; } guc_state; + struct { + /** lock: protects everything in guc_active */ + spinlock_t lock; + /** requests: active requests on this context */ + struct list_head requests; + } guc_active; + /* GuC scheduling state flags that do not require a lock. */ atomic_t guc_sched_state_no_lock; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 03a81e8d87f4..950fc73ed6af 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -420,6 +420,12 @@ struct intel_engine_cs { void (*release)(struct intel_engine_cs *engine); + /* + * Add / remove request from engine active tracking + */ + void (*add_active_request)(struct i915_request *rq); + void (*remove_active_request)(struct i915_request *rq); + struct intel_engine_execlists execlists; /* diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index abe48421fd7a..f9b5f54a5abe 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3106,6 +3106,42 @@ static void execlists_park(struct intel_engine_cs *engine) cancel_timer(&engine->execlists.preempt); } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&locked->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static bool can_preempt(struct intel_engine_cs *engine) { if (GRAPHICS_VER(engine->i915) > 8) @@ -3206,6 +3242,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; engine->bump_serial = execlist_bump_serial; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; engine->reset.prepare = execlists_reset_prepare; engine->reset.rewind = execlists_reset_rewind; @@ -3797,6 +3835,8 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) "v%dx%d", ve->base.class, count); ve->base.context_size = sibling->context_size; + ve->base.add_active_request = sibling->add_active_request; + ve->base.remove_active_request = sibling->remove_active_request; ve->base.emit_bb_start = sibling->emit_bb_start; ve->base.emit_flush = sibling->emit_flush; ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 61469c631057..3b1471c50d40 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1052,6 +1052,25 @@ static void ring_bump_serial(struct intel_engine_cs *engine) engine->serial++; } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + spin_lock_irq(&rq->engine->sched_engine->lock); + list_del_init(&rq->sched.link); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&rq->engine->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static void setup_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1069,6 +1088,9 @@ static void setup_common(struct intel_engine_cs *engine) engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; + engine->cops = &ring_context_ops; engine->request_alloc = ring_request_alloc; engine->bump_serial = ring_bump_serial; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index fc5a65ab1937..5f86ff79c98c 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -235,6 +235,34 @@ static void mock_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->hw_lock, flags); } +static void mock_add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void mock_remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + spin_unlock_irq(&locked->sched_engine->lock); +} + static void mock_reset_prepare(struct intel_engine_cs *engine) { } @@ -327,6 +355,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_flush = mock_emit_flush; engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; + engine->base.add_active_request = mock_add_to_engine; + engine->base.remove_active_request = mock_remove_from_engine; engine->base.reset.prepare = mock_reset_prepare; engine->base.reset.rewind = mock_reset_rewind; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9f28899ff17f..f8a6077fa3f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1147,6 +1147,33 @@ static int guc_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void add_to_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock(&ce->guc_active.lock); + list_move_tail(&rq->sched.link, &ce->guc_active.requests); + spin_unlock(&ce->guc_active.lock); +} + +static void remove_from_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock_irq(&ce->guc_active.lock); + + list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&ce->guc_active.lock); + + atomic_dec(&ce->guc_id_ref); + i915_request_notify_execute_cb_imm(rq); +} + static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, @@ -1567,6 +1594,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &guc_context_ops; engine->request_alloc = guc_request_alloc; engine->bump_serial = guc_bump_serial; + engine->add_active_request = add_to_context; + engine->remove_active_request = remove_from_context; engine->sched_engine->schedule = i915_schedule; @@ -1931,6 +1960,10 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) "v%dx%d", ve->base.class, count); ve->base.context_size = sibling->context_size; + ve->base.add_active_request = + sibling->add_active_request; + ve->base.remove_active_request = + sibling->remove_active_request; ve->base.emit_bb_start = sibling->emit_bb_start; ve->base.emit_flush = sibling->emit_flush; ve->base.emit_init_breadcrumb = diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b3c792d55321..4eba848b20e3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -182,7 +182,7 @@ static bool irq_work_imm(struct irq_work *wrk) return false; } -static void __notify_execute_cb_imm(struct i915_request *rq) +void i915_request_notify_execute_cb_imm(struct i915_request *rq) { __notify_execute_cb(rq, irq_work_imm); } @@ -256,37 +256,6 @@ i915_request_active_engine(struct i915_request *rq, return ret; } - -static void remove_from_engine(struct i915_request *rq) -{ - struct intel_engine_cs *engine, *locked; - - /* - * Virtual engines complicate acquiring the engine timeline lock, - * as their rq->engine pointer is not stable until under that - * engine lock. The simple ploy we use is to take the lock then - * check that the rq still belongs to the newly locked engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->sched_engine->lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->sched_engine->lock); - spin_lock(&engine->sched_engine->lock); - locked = engine; - } - list_del_init(&rq->sched.link); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); - - /* Prevent further __await_execution() registering a cb, then flush */ - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_unlock_irq(&locked->sched_engine->lock); - - __notify_execute_cb_imm(rq); -} - static void __rq_init_watchdog(struct i915_request *rq) { rq->watchdog.timer.function = NULL; @@ -383,9 +352,7 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - if (!list_empty(&rq->sched.link)) - remove_from_engine(rq); - atomic_dec(&rq->context->guc_id_ref); + rq->engine->remove_active_request(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -516,7 +483,7 @@ __await_execution(struct i915_request *rq, if (llist_add(&cb->work.node.llist, &signal->execute_cb)) { if (i915_request_is_active(signal) || __request_in_flight(signal)) - __notify_execute_cb_imm(signal); + i915_request_notify_execute_cb_imm(signal); } return 0; @@ -653,7 +620,7 @@ bool __i915_request_submit(struct i915_request *request) result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - list_move_tail(&request->sched.link, &engine->sched_engine->requests); + engine->add_active_request(request); active: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 717e5b292046..128030f43bbf 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -647,4 +647,6 @@ bool i915_request_active_engine(struct i915_request *rq, struct intel_engine_cs **active); +void i915_request_notify_execute_cb_imm(struct i915_request *rq); + #endif /* I915_REQUEST_H */
Move active request tracking to a backend vfunc rather than assuming all backends want to do this in the maner. In the case execlists / ring submission the tracking is on the physical engine while with GuC submission it is on the context. Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/i915/gt/intel_context.c | 3 ++ drivers/gpu/drm/i915/gt/intel_context_types.h | 7 ++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 +++ .../drm/i915/gt/intel_execlists_submission.c | 40 ++++++++++++++++++ .../gpu/drm/i915/gt/intel_ring_submission.c | 22 ++++++++++ drivers/gpu/drm/i915/gt/mock_engine.c | 30 ++++++++++++++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 33 +++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 41 ++----------------- drivers/gpu/drm/i915/i915_request.h | 2 + 9 files changed, 147 insertions(+), 37 deletions(-)