Message ID | 20200807083256.32761-4-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() | expand |
On 07/08/2020 09:32, Chris Wilson wrote: > Make b->signaled_requests a lockless-list so that we can manipulate it > outside of the b->irq_lock. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 33 ++++++++++++------- > .../gpu/drm/i915/gt/intel_breadcrumbs_types.h | 2 +- > drivers/gpu/drm/i915/i915_request.h | 6 +++- > 3 files changed, 27 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c > index 6c321419441f..98923344f491 100644 > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c > @@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) > intel_engine_add_retire(b->irq_engine, tl); > } > > -static bool __signal_request(struct i915_request *rq, struct list_head *signals) > +static bool __signal_request(struct i915_request *rq) > { > - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); > - > if (!__dma_fence_signal(&rq->fence)) { > i915_request_put(rq); > return false; > } > > - list_add_tail(&rq->signal_link, signals); > return true; > } > > +static struct llist_node * > +__llist_add(struct llist_node *node, struct llist_node *head) > +{ > + node->next = head; > + return node; > +} > + > static void signal_irq_work(struct irq_work *work) > { > struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); > const ktime_t timestamp = ktime_get(); > + struct llist_node *signal, *sn; > struct intel_context *ce, *cn; > struct list_head *pos, *next; > - LIST_HEAD(signal); > + > + signal = NULL; > + if (unlikely(!llist_empty(&b->signaled_requests))) > + signal = llist_del_all(&b->signaled_requests); > > spin_lock(&b->irq_lock); > > @@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work) > if (b->irq_armed && list_empty(&b->signalers)) > __intel_breadcrumbs_disarm_irq(b); > > - list_splice_init(&b->signaled_requests, &signal); > - > list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { > GEM_BUG_ON(list_empty(&ce->signals)); > > @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work) > * spinlock as the callback chain may end up adding > * more signalers to the same context or engine. > */ > - __signal_request(rq, &signal); > + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); > + if (__signal_request(rq)) > + signal = __llist_add(&rq->signal_node, signal); Presumably here you count on no possible races, allowing for a more optimized, custom, __llist_add. It needs a comment at minimum, or even better just use llist_add. Regards, Tvrtko > } > > /* > @@ -262,9 +270,9 @@ static void signal_irq_work(struct irq_work *work) > > spin_unlock(&b->irq_lock); > > - list_for_each_safe(pos, next, &signal) { > + llist_for_each_safe(signal, sn, signal) { > struct i915_request *rq = > - list_entry(pos, typeof(*rq), signal_link); > + llist_entry(signal, typeof(*rq), signal_node); > struct list_head cb_list; > > spin_lock(&rq->lock); > @@ -291,7 +299,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) > > spin_lock_init(&b->irq_lock); > INIT_LIST_HEAD(&b->signalers); > - INIT_LIST_HEAD(&b->signaled_requests); > + init_llist_head(&b->signaled_requests); > > init_irq_work(&b->irq_work, signal_irq_work); > > @@ -352,7 +360,8 @@ static void insert_breadcrumb(struct i915_request *rq, > * its signal completion. > */ > if (__request_completed(rq)) { > - if (__signal_request(rq, &b->signaled_requests)) > + if (__signal_request(rq) && > + llist_add(&rq->signal_node, &b->signaled_requests)) > irq_work_queue(&b->irq_work); > return; > } > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h > index 8e53b9942695..3fa19820b37a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h > @@ -35,7 +35,7 @@ struct intel_breadcrumbs { > struct intel_engine_cs *irq_engine; > > struct list_head signalers; > - struct list_head signaled_requests; > + struct llist_head signaled_requests; > > struct irq_work irq_work; /* for use from inside irq_lock */ > > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 16b721080195..874af6db6103 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -176,7 +176,11 @@ struct i915_request { > struct intel_context *context; > struct intel_ring *ring; > struct intel_timeline __rcu *timeline; > - struct list_head signal_link; > + > + union { > + struct list_head signal_link; > + struct llist_node signal_node; > + }; > > /* > * The rcu epoch of when this request was allocated. Used to judiciously >
Quoting Tvrtko Ursulin (2020-08-07 12:26:41) > > On 07/08/2020 09:32, Chris Wilson wrote: > > static void signal_irq_work(struct irq_work *work) > > { > > struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); > > const ktime_t timestamp = ktime_get(); > > + struct llist_node *signal, *sn; > > struct intel_context *ce, *cn; > > struct list_head *pos, *next; > > - LIST_HEAD(signal); > > + > > + signal = NULL; > > + if (unlikely(!llist_empty(&b->signaled_requests))) > > + signal = llist_del_all(&b->signaled_requests); > > @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work) > > * spinlock as the callback chain may end up adding > > * more signalers to the same context or engine. > > */ > > - __signal_request(rq, &signal); > > + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); > > + if (__signal_request(rq)) > > + signal = __llist_add(&rq->signal_node, signal); > > Presumably here you count on no possible races, allowing for a more > optimized, custom, __llist_add. It needs a comment at minimum, or even > better just use llist_add. It's a purely local singly linked list here. We own the request->signal_node as that is locked by the b->irq_lock and the clear_bit, and signal is a local variable. -Chris
On 07/08/2020 12:54, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-08-07 12:26:41) >> >> On 07/08/2020 09:32, Chris Wilson wrote: >>> static void signal_irq_work(struct irq_work *work) >>> { >>> struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); >>> const ktime_t timestamp = ktime_get(); >>> + struct llist_node *signal, *sn; >>> struct intel_context *ce, *cn; >>> struct list_head *pos, *next; >>> - LIST_HEAD(signal); >>> + >>> + signal = NULL; >>> + if (unlikely(!llist_empty(&b->signaled_requests))) >>> + signal = llist_del_all(&b->signaled_requests); >>> @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work) >>> * spinlock as the callback chain may end up adding >>> * more signalers to the same context or engine. >>> */ >>> - __signal_request(rq, &signal); >>> + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); >>> + if (__signal_request(rq)) >>> + signal = __llist_add(&rq->signal_node, signal); >> >> Presumably here you count on no possible races, allowing for a more >> optimized, custom, __llist_add. It needs a comment at minimum, or even >> better just use llist_add. > > It's a purely local singly linked list here. We own the > request->signal_node as that is locked by the b->irq_lock and the > clear_bit, and signal is a local variable. True.. just a comment then please and with that: Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 6c321419441f..98923344f491 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(b->irq_engine, tl); } -static bool __signal_request(struct i915_request *rq, struct list_head *signals) +static bool __signal_request(struct i915_request *rq) { - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - if (!__dma_fence_signal(&rq->fence)) { i915_request_put(rq); return false; } - list_add_tail(&rq->signal_link, signals); return true; } +static struct llist_node * +__llist_add(struct llist_node *node, struct llist_node *head) +{ + node->next = head; + return node; +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); + struct llist_node *signal, *sn; struct intel_context *ce, *cn; struct list_head *pos, *next; - LIST_HEAD(signal); + + signal = NULL; + if (unlikely(!llist_empty(&b->signaled_requests))) + signal = llist_del_all(&b->signaled_requests); spin_lock(&b->irq_lock); @@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work) if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); - list_splice_init(&b->signaled_requests, &signal); - list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { GEM_BUG_ON(list_empty(&ce->signals)); @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work) * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - __signal_request(rq, &signal); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + if (__signal_request(rq)) + signal = __llist_add(&rq->signal_node, signal); } /* @@ -262,9 +270,9 @@ static void signal_irq_work(struct irq_work *work) spin_unlock(&b->irq_lock); - list_for_each_safe(pos, next, &signal) { + llist_for_each_safe(signal, sn, signal) { struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); + llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; spin_lock(&rq->lock); @@ -291,7 +299,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); - INIT_LIST_HEAD(&b->signaled_requests); + init_llist_head(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); @@ -352,7 +360,8 @@ static void insert_breadcrumb(struct i915_request *rq, * its signal completion. */ if (__request_completed(rq)) { - if (__signal_request(rq, &b->signaled_requests)) + if (__signal_request(rq) && + llist_add(&rq->signal_node, &b->signaled_requests)) irq_work_queue(&b->irq_work); return; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 8e53b9942695..3fa19820b37a 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -35,7 +35,7 @@ struct intel_breadcrumbs { struct intel_engine_cs *irq_engine; struct list_head signalers; - struct list_head signaled_requests; + struct llist_head signaled_requests; struct irq_work irq_work; /* for use from inside irq_lock */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 16b721080195..874af6db6103 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -176,7 +176,11 @@ struct i915_request { struct intel_context *context; struct intel_ring *ring; struct intel_timeline __rcu *timeline; - struct list_head signal_link; + + union { + struct list_head signal_link; + struct llist_node signal_node; + }; /* * The rcu epoch of when this request was allocated. Used to judiciously
Make b->signaled_requests a lockless-list so that we can manipulate it outside of the b->irq_lock. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 33 ++++++++++++------- .../gpu/drm/i915/gt/intel_breadcrumbs_types.h | 2 +- drivers/gpu/drm/i915/i915_request.h | 6 +++- 3 files changed, 27 insertions(+), 14 deletions(-)