[1/6] drm/i915/gt: Protect signaler walk with RCU

Message ID	20200220075025.1539375-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=8h/s=4I=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CD813208C4 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Thu, 20 Feb 2020 07:50:20 +0000 Message-Id: <20200220075025.1539375-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 1/6] drm/i915/gt: Protect signaler walk with RCU Precedence: list Cc: Matthew Auld <matthew.auld@intel.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[1/6] drm/i915/gt: Protect signaler walk with RCU \| expand [1/6] drm/i915/gt: Protect signaler walk with RCU [2/6] drm/i915/gem: Consolidate ctx->engines[] release [3/6] drm/i915/gt: Prevent allocation on a banned context [4/6] drm/i915/gem: Check that the context wasn't closed during setup [5/6] drm/i915/gt: Declare when we enabled timeslicing [6/6] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

Message ID

20200220075025.1539375-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CD813208C4
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 20 Feb 2020 07:50:20 +0000
Message-Id: <20200220075025.1539375-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 1/6] drm/i915/gt: Protect signaler walk with RCU
Precedence: list
Cc: Matthew Auld <matthew.auld@intel.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Series

[1/6] drm/i915/gt: Protect signaler walk with RCU | expand

Commit Message

Chris Wilson Feb. 20, 2020, 7:50 a.m. UTC

While we know that the waiters cannot disappear as we walk our list
(only that they might be added), the same cannot be said for our
signalers as they may be completed by the HW and retired as we process
this request. Ergo we need to use rcu to protect the list iteration and
remember to mark up the list_del_rcu.

v2: Mark the deps as safe-for-rcu

Fixes: 793c22617367 ("drm/i915/gt: Protect execlists_hold/unhold from new waiters")
Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c   | 16 ++++++++++------
 drivers/gpu/drm/i915/i915_scheduler.c |  7 ++++---
 2 files changed, 14 insertions(+), 9 deletions(-)

Comments

Matthew Auld Feb. 20, 2020, 12:47 p.m. UTC | #1

On 20/02/2020 07:50, Chris Wilson wrote:
> While we know that the waiters cannot disappear as we walk our list
> (only that they might be added), the same cannot be said for our
> signalers as they may be completed by the HW and retired as we process
> this request. Ergo we need to use rcu to protect the list iteration and
> remember to mark up the list_del_rcu.
> 
> v2: Mark the deps as safe-for-rcu
> 
> Fixes: 793c22617367 ("drm/i915/gt: Protect execlists_hold/unhold from new waiters")
> Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c   | 16 ++++++++++------
>   drivers/gpu/drm/i915/i915_scheduler.c |  7 ++++---
>   2 files changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index ba31cbe8c68e..47561dc29304 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1668,9 +1668,9 @@ last_active(const struct intel_engine_execlists *execlists)
>   				     wait_link)
>   
>   #define for_each_signaler(p__, rq__) \
> -	list_for_each_entry_lockless(p__, \
> -				     &(rq__)->sched.signalers_list, \
> -				     signal_link)
> +	list_for_each_entry_rcu(p__, \
> +				&(rq__)->sched.signalers_list, \
> +				signal_link)
>   
>   static void defer_request(struct i915_request *rq, struct list_head * const pl)
>   {
> @@ -2533,11 +2533,13 @@ static bool execlists_hold(struct intel_engine_cs *engine,
>   static bool hold_request(const struct i915_request *rq)
>   {
>   	struct i915_dependency *p;
> +	bool result = false;
>   
>   	/*
>   	 * If one of our ancestors is on hold, we must also be on hold,
>   	 * otherwise we will bypass it and execute before it.
>   	 */
> +	rcu_read_lock();
>   	for_each_signaler(p, rq) {
>   		const struct i915_request *s =
>   			container_of(p->signaler, typeof(*s), sched);
> @@ -2545,11 +2547,13 @@ static bool hold_request(const struct i915_request *rq)
>   		if (s->engine != rq->engine)
>   			continue;
>   
> -		if (i915_request_on_hold(s))
> -			return true;
> +		result = i915_request_on_hold(s);
> +		if (result)
> +			break;
>   	}
> +	rcu_read_unlock();
>   
> -	return false;
> +	return result;
>   }
>   
>   static void __execlists_unhold(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index e19a37a83397..59f70b674665 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -486,7 +486,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
>   		GEM_BUG_ON(!list_empty(&dep->dfs_link));
>   
> -		list_del(&dep->wait_link);
> +		list_del_rcu(&dep->wait_link);
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
>   			i915_dependency_free(dep);
>   	}
> @@ -497,7 +497,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   		GEM_BUG_ON(dep->signaler != node);
>   		GEM_BUG_ON(!list_empty(&dep->dfs_link));
>   
> -		list_del(&dep->signal_link);
> +		list_del_rcu(&dep->signal_link);
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
>   			i915_dependency_free(dep);
>   	}
> @@ -526,7 +526,8 @@ static struct i915_global_scheduler global = { {
>   int __init i915_global_scheduler_init(void)
>   {
>   	global.slab_dependencies = KMEM_CACHE(i915_dependency,
> -					      SLAB_HWCACHE_ALIGN);
> +					      SLAB_HWCACHE_ALIGN |
> +					      SLAB_TYPESAFE_BY_RCU);

So, the claim is that we should be fine if the node is re-used and then 
initialised, even though there might exist a minuscule window where 
hold_request might still be able to see it, somehow?

Chris Wilson Feb. 20, 2020, 12:52 p.m. UTC | #2

Quoting Matthew Auld (2020-02-20 12:47:28)
> On 20/02/2020 07:50, Chris Wilson wrote:
> > While we know that the waiters cannot disappear as we walk our list
> > (only that they might be added), the same cannot be said for our
> > signalers as they may be completed by the HW and retired as we process
> > this request. Ergo we need to use rcu to protect the list iteration and
> > remember to mark up the list_del_rcu.
> > 
> > v2: Mark the deps as safe-for-rcu
> > 
> > Fixes: 793c22617367 ("drm/i915/gt: Protect execlists_hold/unhold from new waiters")
> > Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c   | 16 ++++++++++------
> >   drivers/gpu/drm/i915/i915_scheduler.c |  7 ++++---
> >   2 files changed, 14 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index ba31cbe8c68e..47561dc29304 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1668,9 +1668,9 @@ last_active(const struct intel_engine_execlists *execlists)
> >                                    wait_link)
> >   
> >   #define for_each_signaler(p__, rq__) \
> > -     list_for_each_entry_lockless(p__, \
> > -                                  &(rq__)->sched.signalers_list, \
> > -                                  signal_link)
> > +     list_for_each_entry_rcu(p__, \
> > +                             &(rq__)->sched.signalers_list, \
> > +                             signal_link)
> >   
> >   static void defer_request(struct i915_request *rq, struct list_head * const pl)
> >   {
> > @@ -2533,11 +2533,13 @@ static bool execlists_hold(struct intel_engine_cs *engine,
> >   static bool hold_request(const struct i915_request *rq)
> >   {
> >       struct i915_dependency *p;
> > +     bool result = false;
> >   
> >       /*
> >        * If one of our ancestors is on hold, we must also be on hold,
> >        * otherwise we will bypass it and execute before it.
> >        */
> > +     rcu_read_lock();
> >       for_each_signaler(p, rq) {
> >               const struct i915_request *s =
> >                       container_of(p->signaler, typeof(*s), sched);
> > @@ -2545,11 +2547,13 @@ static bool hold_request(const struct i915_request *rq)
> >               if (s->engine != rq->engine)
> >                       continue;
> >   
> > -             if (i915_request_on_hold(s))
> > -                     return true;
> > +             result = i915_request_on_hold(s);
> > +             if (result)
> > +                     break;
> >       }
> > +     rcu_read_unlock();
> >   
> > -     return false;
> > +     return result;
> >   }
> >   
> >   static void __execlists_unhold(struct i915_request *rq)
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> > index e19a37a83397..59f70b674665 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.c
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > @@ -486,7 +486,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
> >       list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
> >               GEM_BUG_ON(!list_empty(&dep->dfs_link));
> >   
> > -             list_del(&dep->wait_link);
> > +             list_del_rcu(&dep->wait_link);
> >               if (dep->flags & I915_DEPENDENCY_ALLOC)
> >                       i915_dependency_free(dep);
> >       }
> > @@ -497,7 +497,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
> >               GEM_BUG_ON(dep->signaler != node);
> >               GEM_BUG_ON(!list_empty(&dep->dfs_link));
> >   
> > -             list_del(&dep->signal_link);
> > +             list_del_rcu(&dep->signal_link);
> >               if (dep->flags & I915_DEPENDENCY_ALLOC)
> >                       i915_dependency_free(dep);
> >       }
> > @@ -526,7 +526,8 @@ static struct i915_global_scheduler global = { {
> >   int __init i915_global_scheduler_init(void)
> >   {
> >       global.slab_dependencies = KMEM_CACHE(i915_dependency,
> > -                                           SLAB_HWCACHE_ALIGN);
> > +                                           SLAB_HWCACHE_ALIGN |
> > +                                           SLAB_TYPESAFE_BY_RCU);
> 
> So, the claim is that we should be fine if the node is re-used and then 
> initialised, even though there might exist a minuscule window where 
> hold_request might still be able to see it, somehow?

Yes. That is my claim. The saving grace here is that for the on-hold
transitions we must go through the engine->active.lock which we are
holding. Ergo hold_request() is safe.
-Chris

Matthew Auld Feb. 20, 2020, 1:16 p.m. UTC | #3

On 20/02/2020 12:52, Chris Wilson wrote:
> Quoting Matthew Auld (2020-02-20 12:47:28)
>> On 20/02/2020 07:50, Chris Wilson wrote:
>>> While we know that the waiters cannot disappear as we walk our list
>>> (only that they might be added), the same cannot be said for our
>>> signalers as they may be completed by the HW and retired as we process
>>> this request. Ergo we need to use rcu to protect the list iteration and
>>> remember to mark up the list_del_rcu.
>>>
>>> v2: Mark the deps as safe-for-rcu
>>>
>>> Fixes: 793c22617367 ("drm/i915/gt: Protect execlists_hold/unhold from new waiters")
>>> Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> Cc: Matthew Auld <matthew.auld@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_lrc.c   | 16 ++++++++++------
>>>    drivers/gpu/drm/i915/i915_scheduler.c |  7 ++++---
>>>    2 files changed, 14 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> index ba31cbe8c68e..47561dc29304 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> @@ -1668,9 +1668,9 @@ last_active(const struct intel_engine_execlists *execlists)
>>>                                     wait_link)
>>>    
>>>    #define for_each_signaler(p__, rq__) \
>>> -     list_for_each_entry_lockless(p__, \
>>> -                                  &(rq__)->sched.signalers_list, \
>>> -                                  signal_link)
>>> +     list_for_each_entry_rcu(p__, \
>>> +                             &(rq__)->sched.signalers_list, \
>>> +                             signal_link)
>>>    
>>>    static void defer_request(struct i915_request *rq, struct list_head * const pl)
>>>    {
>>> @@ -2533,11 +2533,13 @@ static bool execlists_hold(struct intel_engine_cs *engine,
>>>    static bool hold_request(const struct i915_request *rq)
>>>    {
>>>        struct i915_dependency *p;
>>> +     bool result = false;
>>>    
>>>        /*
>>>         * If one of our ancestors is on hold, we must also be on hold,
>>>         * otherwise we will bypass it and execute before it.
>>>         */
>>> +     rcu_read_lock();
>>>        for_each_signaler(p, rq) {
>>>                const struct i915_request *s =
>>>                        container_of(p->signaler, typeof(*s), sched);
>>> @@ -2545,11 +2547,13 @@ static bool hold_request(const struct i915_request *rq)
>>>                if (s->engine != rq->engine)
>>>                        continue;
>>>    
>>> -             if (i915_request_on_hold(s))
>>> -                     return true;
>>> +             result = i915_request_on_hold(s);
>>> +             if (result)
>>> +                     break;
>>>        }
>>> +     rcu_read_unlock();
>>>    
>>> -     return false;
>>> +     return result;
>>>    }
>>>    
>>>    static void __execlists_unhold(struct i915_request *rq)
>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
>>> index e19a37a83397..59f70b674665 100644
>>> --- a/drivers/gpu/drm/i915/i915_scheduler.c
>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
>>> @@ -486,7 +486,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>>>        list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
>>>                GEM_BUG_ON(!list_empty(&dep->dfs_link));
>>>    
>>> -             list_del(&dep->wait_link);
>>> +             list_del_rcu(&dep->wait_link);
>>>                if (dep->flags & I915_DEPENDENCY_ALLOC)
>>>                        i915_dependency_free(dep);
>>>        }
>>> @@ -497,7 +497,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>>>                GEM_BUG_ON(dep->signaler != node);
>>>                GEM_BUG_ON(!list_empty(&dep->dfs_link));
>>>    
>>> -             list_del(&dep->signal_link);
>>> +             list_del_rcu(&dep->signal_link);
>>>                if (dep->flags & I915_DEPENDENCY_ALLOC)
>>>                        i915_dependency_free(dep);
>>>        }
>>> @@ -526,7 +526,8 @@ static struct i915_global_scheduler global = { {
>>>    int __init i915_global_scheduler_init(void)
>>>    {
>>>        global.slab_dependencies = KMEM_CACHE(i915_dependency,
>>> -                                           SLAB_HWCACHE_ALIGN);
>>> +                                           SLAB_HWCACHE_ALIGN |
>>> +                                           SLAB_TYPESAFE_BY_RCU);
>>
>> So, the claim is that we should be fine if the node is re-used and then
>> initialised, even though there might exist a minuscule window where
>> hold_request might still be able to see it, somehow?
> 
> Yes. That is my claim. The saving grace here is that for the on-hold
> transitions we must go through the engine->active.lock which we are
> holding. Ergo hold_request() is safe.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> -Chris
>

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index ba31cbe8c68e..47561dc29304 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1668,9 +1668,9 @@  last_active(const struct intel_engine_execlists *execlists)
 				     wait_link)
 
 #define for_each_signaler(p__, rq__) \
-	list_for_each_entry_lockless(p__, \
-				     &(rq__)->sched.signalers_list, \
-				     signal_link)
+	list_for_each_entry_rcu(p__, \
+				&(rq__)->sched.signalers_list, \
+				signal_link)
 
 static void defer_request(struct i915_request *rq, struct list_head * const pl)
 {
@@ -2533,11 +2533,13 @@  static bool execlists_hold(struct intel_engine_cs *engine,
 static bool hold_request(const struct i915_request *rq)
 {
 	struct i915_dependency *p;
+	bool result = false;
 
 	/*
 	 * If one of our ancestors is on hold, we must also be on hold,
 	 * otherwise we will bypass it and execute before it.
 	 */
+	rcu_read_lock();
 	for_each_signaler(p, rq) {
 		const struct i915_request *s =
 			container_of(p->signaler, typeof(*s), sched);
@@ -2545,11 +2547,13 @@  static bool hold_request(const struct i915_request *rq)
 		if (s->engine != rq->engine)
 			continue;
 
-		if (i915_request_on_hold(s))
-			return true;
+		result = i915_request_on_hold(s);
+		if (result)
+			break;
 	}
+	rcu_read_unlock();
 
-	return false;
+	return result;
 }
 
 static void __execlists_unhold(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e19a37a83397..59f70b674665 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -486,7 +486,7 @@  void i915_sched_node_fini(struct i915_sched_node *node)
 	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
-		list_del(&dep->wait_link);
+		list_del_rcu(&dep->wait_link);
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
@@ -497,7 +497,7 @@  void i915_sched_node_fini(struct i915_sched_node *node)
 		GEM_BUG_ON(dep->signaler != node);
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
-		list_del(&dep->signal_link);
+		list_del_rcu(&dep->signal_link);
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
@@ -526,7 +526,8 @@  static struct i915_global_scheduler global = { {
 int __init i915_global_scheduler_init(void)
 {
 	global.slab_dependencies = KMEM_CACHE(i915_dependency,
-					      SLAB_HWCACHE_ALIGN);
+					      SLAB_HWCACHE_ALIGN |
+					      SLAB_TYPESAFE_BY_RCU);
 	if (!global.slab_dependencies)
 		return -ENOMEM;

[1/6] drm/i915/gt: Protect signaler walk with RCU

Commit Message

Comments

Patch