diff mbox series

[4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock

Message ID 20200807083256.32761-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() | expand

Commit Message

Chris Wilson Aug. 7, 2020, 8:32 a.m. UTC
Make b->signaled_requests a lockless-list so that we can manipulate it
outside of the b->irq_lock.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 33 ++++++++++++-------
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  2 +-
 drivers/gpu/drm/i915/i915_request.h           |  6 +++-
 3 files changed, 27 insertions(+), 14 deletions(-)

Comments

Tvrtko Ursulin Aug. 7, 2020, 11:26 a.m. UTC | #1
On 07/08/2020 09:32, Chris Wilson wrote:
> Make b->signaled_requests a lockless-list so that we can manipulate it
> outside of the b->irq_lock.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 33 ++++++++++++-------
>   .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  2 +-
>   drivers/gpu/drm/i915/i915_request.h           |  6 +++-
>   3 files changed, 27 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 6c321419441f..98923344f491 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
>   		intel_engine_add_retire(b->irq_engine, tl);
>   }
>   
> -static bool __signal_request(struct i915_request *rq, struct list_head *signals)
> +static bool __signal_request(struct i915_request *rq)
>   {
> -	clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> -
>   	if (!__dma_fence_signal(&rq->fence)) {
>   		i915_request_put(rq);
>   		return false;
>   	}
>   
> -	list_add_tail(&rq->signal_link, signals);
>   	return true;
>   }
>   
> +static struct llist_node *
> +__llist_add(struct llist_node *node, struct llist_node *head)
> +{
> +	node->next = head;
> +	return node;
> +}
> +
>   static void signal_irq_work(struct irq_work *work)
>   {
>   	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
>   	const ktime_t timestamp = ktime_get();
> +	struct llist_node *signal, *sn;
>   	struct intel_context *ce, *cn;
>   	struct list_head *pos, *next;
> -	LIST_HEAD(signal);
> +
> +	signal = NULL;
> +	if (unlikely(!llist_empty(&b->signaled_requests)))
> +		signal = llist_del_all(&b->signaled_requests);
>   
>   	spin_lock(&b->irq_lock);
>   
> @@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work)
>   	if (b->irq_armed && list_empty(&b->signalers))
>   		__intel_breadcrumbs_disarm_irq(b);
>   
> -	list_splice_init(&b->signaled_requests, &signal);
> -
>   	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
>   		GEM_BUG_ON(list_empty(&ce->signals));
>   
> @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
>   			 * spinlock as the callback chain may end up adding
>   			 * more signalers to the same context or engine.
>   			 */
> -			__signal_request(rq, &signal);
> +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +			if (__signal_request(rq))
> +				signal = __llist_add(&rq->signal_node, signal);

Presumably here you count on no possible races, allowing for a more 
optimized, custom, __llist_add. It needs a comment at minimum, or even 
better just use llist_add.

Regards,

Tvrtko

>   		}
>   
>   		/*
> @@ -262,9 +270,9 @@ static void signal_irq_work(struct irq_work *work)
>   
>   	spin_unlock(&b->irq_lock);
>   
> -	list_for_each_safe(pos, next, &signal) {
> +	llist_for_each_safe(signal, sn, signal) {
>   		struct i915_request *rq =
> -			list_entry(pos, typeof(*rq), signal_link);
> +			llist_entry(signal, typeof(*rq), signal_node);
>   		struct list_head cb_list;
>   
>   		spin_lock(&rq->lock);
> @@ -291,7 +299,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
>   
>   	spin_lock_init(&b->irq_lock);
>   	INIT_LIST_HEAD(&b->signalers);
> -	INIT_LIST_HEAD(&b->signaled_requests);
> +	init_llist_head(&b->signaled_requests);
>   
>   	init_irq_work(&b->irq_work, signal_irq_work);
>   
> @@ -352,7 +360,8 @@ static void insert_breadcrumb(struct i915_request *rq,
>   	 * its signal completion.
>   	 */
>   	if (__request_completed(rq)) {
> -		if (__signal_request(rq, &b->signaled_requests))
> +		if (__signal_request(rq) &&
> +		    llist_add(&rq->signal_node, &b->signaled_requests))
>   			irq_work_queue(&b->irq_work);
>   		return;
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> index 8e53b9942695..3fa19820b37a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> @@ -35,7 +35,7 @@ struct intel_breadcrumbs {
>   	struct intel_engine_cs *irq_engine;
>   
>   	struct list_head signalers;
> -	struct list_head signaled_requests;
> +	struct llist_head signaled_requests;
>   
>   	struct irq_work irq_work; /* for use from inside irq_lock */
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 16b721080195..874af6db6103 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -176,7 +176,11 @@ struct i915_request {
>   	struct intel_context *context;
>   	struct intel_ring *ring;
>   	struct intel_timeline __rcu *timeline;
> -	struct list_head signal_link;
> +
> +	union {
> +		struct list_head signal_link;
> +		struct llist_node signal_node;
> +	};
>   
>   	/*
>   	 * The rcu epoch of when this request was allocated. Used to judiciously
>
Chris Wilson Aug. 7, 2020, 11:54 a.m. UTC | #2
Quoting Tvrtko Ursulin (2020-08-07 12:26:41)
> 
> On 07/08/2020 09:32, Chris Wilson wrote:
> >   static void signal_irq_work(struct irq_work *work)
> >   {
> >       struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
> >       const ktime_t timestamp = ktime_get();
> > +     struct llist_node *signal, *sn;
> >       struct intel_context *ce, *cn;
> >       struct list_head *pos, *next;
> > -     LIST_HEAD(signal);
> > +
> > +     signal = NULL;
> > +     if (unlikely(!llist_empty(&b->signaled_requests)))
> > +             signal = llist_del_all(&b->signaled_requests);
> > @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
> >                        * spinlock as the callback chain may end up adding
> >                        * more signalers to the same context or engine.
> >                        */
> > -                     __signal_request(rq, &signal);
> > +                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> > +                     if (__signal_request(rq))
> > +                             signal = __llist_add(&rq->signal_node, signal);
> 
> Presumably here you count on no possible races, allowing for a more 
> optimized, custom, __llist_add. It needs a comment at minimum, or even 
> better just use llist_add.

It's a purely local singly linked list here. We own the
request->signal_node as that is locked by the b->irq_lock and the
clear_bit, and signal is a local variable.
-Chris
Tvrtko Ursulin Aug. 7, 2020, 11:56 a.m. UTC | #3
On 07/08/2020 12:54, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-08-07 12:26:41)
>>
>> On 07/08/2020 09:32, Chris Wilson wrote:
>>>    static void signal_irq_work(struct irq_work *work)
>>>    {
>>>        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
>>>        const ktime_t timestamp = ktime_get();
>>> +     struct llist_node *signal, *sn;
>>>        struct intel_context *ce, *cn;
>>>        struct list_head *pos, *next;
>>> -     LIST_HEAD(signal);
>>> +
>>> +     signal = NULL;
>>> +     if (unlikely(!llist_empty(&b->signaled_requests)))
>>> +             signal = llist_del_all(&b->signaled_requests);
>>> @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
>>>                         * spinlock as the callback chain may end up adding
>>>                         * more signalers to the same context or engine.
>>>                         */
>>> -                     __signal_request(rq, &signal);
>>> +                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>>> +                     if (__signal_request(rq))
>>> +                             signal = __llist_add(&rq->signal_node, signal);
>>
>> Presumably here you count on no possible races, allowing for a more
>> optimized, custom, __llist_add. It needs a comment at minimum, or even
>> better just use llist_add.
> 
> It's a purely local singly linked list here. We own the
> request->signal_node as that is locked by the b->irq_lock and the
> clear_bit, and signal is a local variable.

True.. just a comment then please and with that:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 6c321419441f..98923344f491 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -173,26 +173,34 @@  static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 		intel_engine_add_retire(b->irq_engine, tl);
 }
 
-static bool __signal_request(struct i915_request *rq, struct list_head *signals)
+static bool __signal_request(struct i915_request *rq)
 {
-	clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
 	if (!__dma_fence_signal(&rq->fence)) {
 		i915_request_put(rq);
 		return false;
 	}
 
-	list_add_tail(&rq->signal_link, signals);
 	return true;
 }
 
+static struct llist_node *
+__llist_add(struct llist_node *node, struct llist_node *head)
+{
+	node->next = head;
+	return node;
+}
+
 static void signal_irq_work(struct irq_work *work)
 {
 	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 	const ktime_t timestamp = ktime_get();
+	struct llist_node *signal, *sn;
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
-	LIST_HEAD(signal);
+
+	signal = NULL;
+	if (unlikely(!llist_empty(&b->signaled_requests)))
+		signal = llist_del_all(&b->signaled_requests);
 
 	spin_lock(&b->irq_lock);
 
@@ -224,8 +232,6 @@  static void signal_irq_work(struct irq_work *work)
 	if (b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 
-	list_splice_init(&b->signaled_requests, &signal);
-
 	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
 		GEM_BUG_ON(list_empty(&ce->signals));
 
@@ -242,7 +248,9 @@  static void signal_irq_work(struct irq_work *work)
 			 * spinlock as the callback chain may end up adding
 			 * more signalers to the same context or engine.
 			 */
-			__signal_request(rq, &signal);
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+			if (__signal_request(rq))
+				signal = __llist_add(&rq->signal_node, signal);
 		}
 
 		/*
@@ -262,9 +270,9 @@  static void signal_irq_work(struct irq_work *work)
 
 	spin_unlock(&b->irq_lock);
 
-	list_for_each_safe(pos, next, &signal) {
+	llist_for_each_safe(signal, sn, signal) {
 		struct i915_request *rq =
-			list_entry(pos, typeof(*rq), signal_link);
+			llist_entry(signal, typeof(*rq), signal_node);
 		struct list_head cb_list;
 
 		spin_lock(&rq->lock);
@@ -291,7 +299,7 @@  intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 
 	spin_lock_init(&b->irq_lock);
 	INIT_LIST_HEAD(&b->signalers);
-	INIT_LIST_HEAD(&b->signaled_requests);
+	init_llist_head(&b->signaled_requests);
 
 	init_irq_work(&b->irq_work, signal_irq_work);
 
@@ -352,7 +360,8 @@  static void insert_breadcrumb(struct i915_request *rq,
 	 * its signal completion.
 	 */
 	if (__request_completed(rq)) {
-		if (__signal_request(rq, &b->signaled_requests))
+		if (__signal_request(rq) &&
+		    llist_add(&rq->signal_node, &b->signaled_requests))
 			irq_work_queue(&b->irq_work);
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 8e53b9942695..3fa19820b37a 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -35,7 +35,7 @@  struct intel_breadcrumbs {
 	struct intel_engine_cs *irq_engine;
 
 	struct list_head signalers;
-	struct list_head signaled_requests;
+	struct llist_head signaled_requests;
 
 	struct irq_work irq_work; /* for use from inside irq_lock */
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 16b721080195..874af6db6103 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -176,7 +176,11 @@  struct i915_request {
 	struct intel_context *context;
 	struct intel_ring *ring;
 	struct intel_timeline __rcu *timeline;
-	struct list_head signal_link;
+
+	union {
+		struct list_head signal_link;
+		struct llist_node signal_node;
+	};
 
 	/*
 	 * The rcu epoch of when this request was allocated. Used to judiciously