[v2] drm/i915: Reacquire priolist cache after dropping the engine lock
diff mbox series

Message ID 20190211160953.32659-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [v2] drm/i915: Reacquire priolist cache after dropping the engine lock
Related show

Commit Message

Chris Wilson Feb. 11, 2019, 4:09 p.m. UTC
If we drop the engine lock, we may run execlists_dequeue which may free
the priolist. Therefore if we ever drop the execution lock on the
engine, we have to discard our cache and refetch the priolist to ensure
we do not use a stale pointer.

[  506.418935] [IGT] gem_exec_whisper: starting subtest contexts-priority
[  593.240825] general protection fault: 0000 [#1] SMP
[  593.240863] CPU: 1 PID: 494 Comm: gem_exec_whispe Tainted: G     U            5.0.0-rc6+ #100
[  593.240879] Hardware name:  /NUC6CAYB, BIOS AYAPLCEL.86A.0029.2016.1124.1625 11/24/2016
[  593.240965] RIP: 0010:__i915_schedule+0x1fe/0x320 [i915]
[  593.240981] Code: 48 8b 0c 24 48 89 c3 49 8b 45 28 49 8b 75 20 4c 89 3c 24 48 89 46 08 48 89 30 48 8b 43 08 48 89 4b 08 49 89 5d 20 49 89 45 28 <48> 89 08 45 39 a7 b8 03 00 00 7d 44 45 89 a7 b8 03 00 00 49 8b 85
[  593.240999] RSP: 0018:ffffc90000057a60 EFLAGS: 00010046
[  593.241013] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8882582d7870 RCX: ffff88826baba6f0
[  593.241026] RDX: 0000000000000000 RSI: ffff8882582d6e70 RDI: ffff888273482194
[  593.241049] RBP: ffffc90000057a68 R08: ffff8882582d7680 R09: ffff8882582d7840
[  593.241068] R10: 0000000000000000 R11: ffffea00095ebe08 R12: 0000000000000728
[  593.241105] R13: ffff88826baba6d0 R14: ffffc90000057a40 R15: ffff888273482158
[  593.241120] FS:  00007f4613fb3900(0000) GS:ffff888277a80000(0000) knlGS:0000000000000000
[  593.241133] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  593.241146] CR2: 00007f57d3c66a84 CR3: 000000026e2b6000 CR4: 00000000001406e0
[  593.241158] Call Trace:
[  593.241233]  i915_schedule+0x1f/0x30 [i915]
[  593.241326]  i915_request_add+0x1a9/0x290 [i915]
[  593.241393]  i915_gem_do_execbuffer+0x45f/0x1150 [i915]
[  593.241411]  ? init_object+0x49/0x80
[  593.241425]  ? ___slab_alloc.constprop.91+0x4b8/0x4e0
[  593.241491]  ? i915_gem_execbuffer2_ioctl+0x99/0x380 [i915]
[  593.241563]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
[  593.241629]  i915_gem_execbuffer2_ioctl+0x1bb/0x380 [i915]
[  593.241705]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
[  593.241724]  drm_ioctl_kernel+0x81/0xd0
[  593.241738]  drm_ioctl+0x1a7/0x310
[  593.241803]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
[  593.241819]  ? __update_load_avg_se+0x1c9/0x240
[  593.241834]  ? pick_next_entity+0x7e/0x120
[  593.241851]  do_vfs_ioctl+0x88/0x5d0
[  593.241880]  ksys_ioctl+0x35/0x70
[  593.241894]  __x64_sys_ioctl+0x11/0x20
[  593.241907]  do_syscall_64+0x44/0xf0
[  593.241924]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  593.241940] RIP: 0033:0x7f4615ffe757
[  593.241952] Code: 00 00 90 48 8b 05 39 a7 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 09 a7 0c 00 f7 d8 64 89 01 48
[  593.241970] RSP: 002b:00007ffc1030ddf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  593.241984] RAX: ffffffffffffffda RBX: 00007ffc10324420 RCX: 00007f4615ffe757
[  593.241997] RDX: 00007ffc1030e220 RSI: 0000000040406469 RDI: 0000000000000003
[  593.242010] RBP: 00007ffc1030e220 R08: 00007f46160c9208 R09: 00007f46160c9240
[  593.242022] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000040406469
[  593.242038] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000
[  593.242058] Modules linked in: i915 intel_gtt drm_kms_helper prime_numbers

v2: Track the local engine cache and explicitly clear it when switching
engine locks.

Fixes: a02eb975be78 ("drm/i915/execlists: Cache the priolist when rescheduling")
Testcase: igt/gem_exec_whisper/contexts-priority # rare!
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

Comments

Tvrtko Ursulin Feb. 11, 2019, 5:33 p.m. UTC | #1
On 11/02/2019 16:09, Chris Wilson wrote:
> If we drop the engine lock, we may run execlists_dequeue which may free
> the priolist. Therefore if we ever drop the execution lock on the
> engine, we have to discard our cache and refetch the priolist to ensure
> we do not use a stale pointer.

On first look one would observe current code is trying to re-acquire the prio list on changing the engine, so I guess the problem is this check is hidden under an additional conditional.

So a simpler approach of:

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index d01683167c77..74896e7dbfa7 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -341,16 +341,17 @@ static void __i915_schedule(struct i915_request *rq,
                engine = sched_lock_engine(node, engine);
                lockdep_assert_held(&engine->timeline.lock);
 
+               if (last != engine) {
+                       pl = i915_sched_lookup_priolist(engine, prio);
+                       last = engine;
+               }
+
                /* Recheck after acquiring the engine->timeline.lock */
                if (prio <= node->attr.priority || node_signaled(node))
                        continue;
 
                node->attr.priority = prio;
                if (!list_empty(&node->link)) {
-                       if (last != engine) {
-                               pl = i915_sched_lookup_priolist(engine, prio);
-                               last = engine;
-                       }
                        list_move_tail(&node->link, pl);
                } else {
                        /*
Would not be acceptable?

> 
> [  506.418935] [IGT] gem_exec_whisper: starting subtest contexts-priority
> [  593.240825] general protection fault: 0000 [#1] SMP
> [  593.240863] CPU: 1 PID: 494 Comm: gem_exec_whispe Tainted: G     U            5.0.0-rc6+ #100
> [  593.240879] Hardware name:  /NUC6CAYB, BIOS AYAPLCEL.86A.0029.2016.1124.1625 11/24/2016
> [  593.240965] RIP: 0010:__i915_schedule+0x1fe/0x320 [i915]
> [  593.240981] Code: 48 8b 0c 24 48 89 c3 49 8b 45 28 49 8b 75 20 4c 89 3c 24 48 89 46 08 48 89 30 48 8b 43 08 48 89 4b 08 49 89 5d 20 49 89 45 28 <48> 89 08 45 39 a7 b8 03 00 00 7d 44 45 89 a7 b8 03 00 00 49 8b 85
> [  593.240999] RSP: 0018:ffffc90000057a60 EFLAGS: 00010046
> [  593.241013] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8882582d7870 RCX: ffff88826baba6f0
> [  593.241026] RDX: 0000000000000000 RSI: ffff8882582d6e70 RDI: ffff888273482194
> [  593.241049] RBP: ffffc90000057a68 R08: ffff8882582d7680 R09: ffff8882582d7840
> [  593.241068] R10: 0000000000000000 R11: ffffea00095ebe08 R12: 0000000000000728
> [  593.241105] R13: ffff88826baba6d0 R14: ffffc90000057a40 R15: ffff888273482158
> [  593.241120] FS:  00007f4613fb3900(0000) GS:ffff888277a80000(0000) knlGS:0000000000000000
> [  593.241133] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  593.241146] CR2: 00007f57d3c66a84 CR3: 000000026e2b6000 CR4: 00000000001406e0
> [  593.241158] Call Trace:
> [  593.241233]  i915_schedule+0x1f/0x30 [i915]
> [  593.241326]  i915_request_add+0x1a9/0x290 [i915]
> [  593.241393]  i915_gem_do_execbuffer+0x45f/0x1150 [i915]
> [  593.241411]  ? init_object+0x49/0x80
> [  593.241425]  ? ___slab_alloc.constprop.91+0x4b8/0x4e0
> [  593.241491]  ? i915_gem_execbuffer2_ioctl+0x99/0x380 [i915]
> [  593.241563]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
> [  593.241629]  i915_gem_execbuffer2_ioctl+0x1bb/0x380 [i915]
> [  593.241705]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
> [  593.241724]  drm_ioctl_kernel+0x81/0xd0
> [  593.241738]  drm_ioctl+0x1a7/0x310
> [  593.241803]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
> [  593.241819]  ? __update_load_avg_se+0x1c9/0x240
> [  593.241834]  ? pick_next_entity+0x7e/0x120
> [  593.241851]  do_vfs_ioctl+0x88/0x5d0
> [  593.241880]  ksys_ioctl+0x35/0x70
> [  593.241894]  __x64_sys_ioctl+0x11/0x20
> [  593.241907]  do_syscall_64+0x44/0xf0
> [  593.241924]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [  593.241940] RIP: 0033:0x7f4615ffe757
> [  593.241952] Code: 00 00 90 48 8b 05 39 a7 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 09 a7 0c 00 f7 d8 64 89 01 48
> [  593.241970] RSP: 002b:00007ffc1030ddf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [  593.241984] RAX: ffffffffffffffda RBX: 00007ffc10324420 RCX: 00007f4615ffe757
> [  593.241997] RDX: 00007ffc1030e220 RSI: 0000000040406469 RDI: 0000000000000003
> [  593.242010] RBP: 00007ffc1030e220 R08: 00007f46160c9208 R09: 00007f46160c9240
> [  593.242022] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000040406469
> [  593.242038] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000
> [  593.242058] Modules linked in: i915 intel_gtt drm_kms_helper prime_numbers
> 
> v2: Track the local engine cache and explicitly clear it when switching
> engine locks.
> 
> Fixes: a02eb975be78 ("drm/i915/execlists: Cache the priolist when rescheduling")
> Testcase: igt/gem_exec_whisper/contexts-priority # rare!
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_scheduler.c | 27 +++++++++++++++++----------
>   1 file changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index d01683167c77..8bc042551692 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -223,8 +223,14 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
>   	return &p->requests[idx];
>   }
>   
> +struct sched_cache {
> +	struct list_head *priolist;
> +};

Do you plan to add more stuff since you decided to wrap into a struct
and use memset?

Regards,

Tvrtko

> +
>   static struct intel_engine_cs *
> -sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
> +sched_lock_engine(const struct i915_sched_node *node,
> +		  struct intel_engine_cs *locked,
> +		  struct sched_cache *cache)
>   {
>   	struct intel_engine_cs *engine = node_to_request(node)->engine;
>   
> @@ -232,6 +238,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   
>   	if (engine != locked) {
>   		spin_unlock(&locked->timeline.lock);
> +		memset(cache, 0, sizeof(*cache));
>   		spin_lock(&engine->timeline.lock);
>   	}
>   
> @@ -253,11 +260,11 @@ static bool inflight(const struct i915_request *rq,
>   static void __i915_schedule(struct i915_request *rq,
>   			    const struct i915_sched_attr *attr)
>   {
> -	struct list_head *uninitialized_var(pl);
> -	struct intel_engine_cs *engine, *last;
> +	struct intel_engine_cs *engine;
>   	struct i915_dependency *dep, *p;
>   	struct i915_dependency stack;
>   	const int prio = attr->priority;
> +	struct sched_cache cache;
>   	LIST_HEAD(dfs);
>   
>   	/* Needed in order to use the temporary link inside i915_dependency */
> @@ -328,7 +335,7 @@ static void __i915_schedule(struct i915_request *rq,
>   		__list_del_entry(&stack.dfs_link);
>   	}
>   
> -	last = NULL;
> +	memset(&cache, 0, sizeof(cache));
>   	engine = rq->engine;
>   	spin_lock_irq(&engine->timeline.lock);
>   
> @@ -338,7 +345,7 @@ static void __i915_schedule(struct i915_request *rq,
>   
>   		INIT_LIST_HEAD(&dep->dfs_link);
>   
> -		engine = sched_lock_engine(node, engine);
> +		engine = sched_lock_engine(node, engine, &cache);
>   		lockdep_assert_held(&engine->timeline.lock);
>   
>   		/* Recheck after acquiring the engine->timeline.lock */
> @@ -347,11 +354,11 @@ static void __i915_schedule(struct i915_request *rq,
>   
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
> -			if (last != engine) {
> -				pl = i915_sched_lookup_priolist(engine, prio);
> -				last = engine;
> -			}
> -			list_move_tail(&node->link, pl);
> +			if (!cache.priolist)
> +				cache.priolist =
> +					i915_sched_lookup_priolist(engine,
> +								   prio);
> +			list_move_tail(&node->link, cache.priolist);
>   		} else {
>   			/*
>   			 * If the request is not in the priolist queue because
>
Chris Wilson Feb. 11, 2019, 5:44 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-02-11 17:33:37)
> 
> On 11/02/2019 16:09, Chris Wilson wrote:
> > If we drop the engine lock, we may run execlists_dequeue which may free
> > the priolist. Therefore if we ever drop the execution lock on the
> > engine, we have to discard our cache and refetch the priolist to ensure
> > we do not use a stale pointer.
> 
> On first look one would observe current code is trying to re-acquire the prio list on changing the engine, so I guess the problem is this check is hidden under an additional conditional.
> 
> So a simpler approach of:
> 
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index d01683167c77..74896e7dbfa7 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -341,16 +341,17 @@ static void __i915_schedule(struct i915_request *rq,
>                 engine = sched_lock_engine(node, engine);
>                 lockdep_assert_held(&engine->timeline.lock);
>  
> +               if (last != engine) {
> +                       pl = i915_sched_lookup_priolist(engine, prio);
> +                       last = engine;
> +               }
> +
>                 /* Recheck after acquiring the engine->timeline.lock */
>                 if (prio <= node->attr.priority || node_signaled(node))
>                         continue;
>  
>                 node->attr.priority = prio;
>                 if (!list_empty(&node->link)) {
> -                       if (last != engine) {
> -                               pl = i915_sched_lookup_priolist(engine, prio);
> -                               last = engine;
> -                       }
>                         list_move_tail(&node->link, pl);
>                 } else {
>                         /*
> Would not be acceptable?

It marks the priolist as used even though we may not populate it. That
extra conditional biting again, and it defeats the purpose of not having
to look up the priolist as often (for engines where we don't need to
touch the priority queue itself).

> > [  506.418935] [IGT] gem_exec_whisper: starting subtest contexts-priority
> > [  593.240825] general protection fault: 0000 [#1] SMP
> > [  593.240863] CPU: 1 PID: 494 Comm: gem_exec_whispe Tainted: G     U            5.0.0-rc6+ #100
> > [  593.240879] Hardware name:  /NUC6CAYB, BIOS AYAPLCEL.86A.0029.2016.1124.1625 11/24/2016
> > [  593.240965] RIP: 0010:__i915_schedule+0x1fe/0x320 [i915]
> > [  593.240981] Code: 48 8b 0c 24 48 89 c3 49 8b 45 28 49 8b 75 20 4c 89 3c 24 48 89 46 08 48 89 30 48 8b 43 08 48 89 4b 08 49 89 5d 20 49 89 45 28 <48> 89 08 45 39 a7 b8 03 00 00 7d 44 45 89 a7 b8 03 00 00 49 8b 85
> > [  593.240999] RSP: 0018:ffffc90000057a60 EFLAGS: 00010046
> > [  593.241013] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8882582d7870 RCX: ffff88826baba6f0
> > [  593.241026] RDX: 0000000000000000 RSI: ffff8882582d6e70 RDI: ffff888273482194
> > [  593.241049] RBP: ffffc90000057a68 R08: ffff8882582d7680 R09: ffff8882582d7840
> > [  593.241068] R10: 0000000000000000 R11: ffffea00095ebe08 R12: 0000000000000728
> > [  593.241105] R13: ffff88826baba6d0 R14: ffffc90000057a40 R15: ffff888273482158
> > [  593.241120] FS:  00007f4613fb3900(0000) GS:ffff888277a80000(0000) knlGS:0000000000000000
> > [  593.241133] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [  593.241146] CR2: 00007f57d3c66a84 CR3: 000000026e2b6000 CR4: 00000000001406e0
> > [  593.241158] Call Trace:
> > [  593.241233]  i915_schedule+0x1f/0x30 [i915]
> > [  593.241326]  i915_request_add+0x1a9/0x290 [i915]
> > [  593.241393]  i915_gem_do_execbuffer+0x45f/0x1150 [i915]
> > [  593.241411]  ? init_object+0x49/0x80
> > [  593.241425]  ? ___slab_alloc.constprop.91+0x4b8/0x4e0
> > [  593.241491]  ? i915_gem_execbuffer2_ioctl+0x99/0x380 [i915]
> > [  593.241563]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
> > [  593.241629]  i915_gem_execbuffer2_ioctl+0x1bb/0x380 [i915]
> > [  593.241705]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
> > [  593.241724]  drm_ioctl_kernel+0x81/0xd0
> > [  593.241738]  drm_ioctl+0x1a7/0x310
> > [  593.241803]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
> > [  593.241819]  ? __update_load_avg_se+0x1c9/0x240
> > [  593.241834]  ? pick_next_entity+0x7e/0x120
> > [  593.241851]  do_vfs_ioctl+0x88/0x5d0
> > [  593.241880]  ksys_ioctl+0x35/0x70
> > [  593.241894]  __x64_sys_ioctl+0x11/0x20
> > [  593.241907]  do_syscall_64+0x44/0xf0
> > [  593.241924]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > [  593.241940] RIP: 0033:0x7f4615ffe757
> > [  593.241952] Code: 00 00 90 48 8b 05 39 a7 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 09 a7 0c 00 f7 d8 64 89 01 48
> > [  593.241970] RSP: 002b:00007ffc1030ddf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> > [  593.241984] RAX: ffffffffffffffda RBX: 00007ffc10324420 RCX: 00007f4615ffe757
> > [  593.241997] RDX: 00007ffc1030e220 RSI: 0000000040406469 RDI: 0000000000000003
> > [  593.242010] RBP: 00007ffc1030e220 R08: 00007f46160c9208 R09: 00007f46160c9240
> > [  593.242022] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000040406469
> > [  593.242038] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000
> > [  593.242058] Modules linked in: i915 intel_gtt drm_kms_helper prime_numbers
> > 
> > v2: Track the local engine cache and explicitly clear it when switching
> > engine locks.
> > 
> > Fixes: a02eb975be78 ("drm/i915/execlists: Cache the priolist when rescheduling")
> > Testcase: igt/gem_exec_whisper/contexts-priority # rare!
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_scheduler.c | 27 +++++++++++++++++----------
> >   1 file changed, 17 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> > index d01683167c77..8bc042551692 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.c
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > @@ -223,8 +223,14 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
> >       return &p->requests[idx];
> >   }
> >   
> > +struct sched_cache {
> > +     struct list_head *priolist;
> > +};
> 
> Do you plan to add more stuff since you decided to wrap into a struct
> and use memset?

Not right now, but I preferred the approach of being having the switch
of engines also clearing the associated state.

I've thought about sticking something else in there, but I can't
remember what -- it's the iterative DFS that gets highlighted in profiles
(not too unsurprisingly). Oh, I remember it was about trying to fiddle
with per-engine queue priority before inflight().
-Chris
Tvrtko Ursulin Feb. 11, 2019, 5:55 p.m. UTC | #3
On 11/02/2019 17:44, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-02-11 17:33:37)
>>
>> On 11/02/2019 16:09, Chris Wilson wrote:
>>> If we drop the engine lock, we may run execlists_dequeue which may free
>>> the priolist. Therefore if we ever drop the execution lock on the
>>> engine, we have to discard our cache and refetch the priolist to ensure
>>> we do not use a stale pointer.
>>
>> On first look one would observe current code is trying to re-acquire the prio list on changing the engine, so I guess the problem is this check is hidden under an additional conditional.
>>
>> So a simpler approach of:
>>
>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
>> index d01683167c77..74896e7dbfa7 100644
>> --- a/drivers/gpu/drm/i915/i915_scheduler.c
>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
>> @@ -341,16 +341,17 @@ static void __i915_schedule(struct i915_request *rq,
>>                  engine = sched_lock_engine(node, engine);
>>                  lockdep_assert_held(&engine->timeline.lock);
>>   
>> +               if (last != engine) {
>> +                       pl = i915_sched_lookup_priolist(engine, prio);
>> +                       last = engine;
>> +               }
>> +
>>                  /* Recheck after acquiring the engine->timeline.lock */
>>                  if (prio <= node->attr.priority || node_signaled(node))
>>                          continue;
>>   
>>                  node->attr.priority = prio;
>>                  if (!list_empty(&node->link)) {
>> -                       if (last != engine) {
>> -                               pl = i915_sched_lookup_priolist(engine, prio);
>> -                               last = engine;
>> -                       }
>>                          list_move_tail(&node->link, pl);
>>                  } else {
>>                          /*
>> Would not be acceptable?
> 
> It marks the priolist as used even though we may not populate it. That
> extra conditional biting again, and it defeats the purpose of not having
> to look up the priolist as often (for engines where we don't need to
> touch the priority queue itself).

Yep, my bad.

>>> [  506.418935] [IGT] gem_exec_whisper: starting subtest contexts-priority
>>> [  593.240825] general protection fault: 0000 [#1] SMP
>>> [  593.240863] CPU: 1 PID: 494 Comm: gem_exec_whispe Tainted: G     U            5.0.0-rc6+ #100
>>> [  593.240879] Hardware name:  /NUC6CAYB, BIOS AYAPLCEL.86A.0029.2016.1124.1625 11/24/2016
>>> [  593.240965] RIP: 0010:__i915_schedule+0x1fe/0x320 [i915]
>>> [  593.240981] Code: 48 8b 0c 24 48 89 c3 49 8b 45 28 49 8b 75 20 4c 89 3c 24 48 89 46 08 48 89 30 48 8b 43 08 48 89 4b 08 49 89 5d 20 49 89 45 28 <48> 89 08 45 39 a7 b8 03 00 00 7d 44 45 89 a7 b8 03 00 00 49 8b 85
>>> [  593.240999] RSP: 0018:ffffc90000057a60 EFLAGS: 00010046
>>> [  593.241013] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8882582d7870 RCX: ffff88826baba6f0
>>> [  593.241026] RDX: 0000000000000000 RSI: ffff8882582d6e70 RDI: ffff888273482194
>>> [  593.241049] RBP: ffffc90000057a68 R08: ffff8882582d7680 R09: ffff8882582d7840
>>> [  593.241068] R10: 0000000000000000 R11: ffffea00095ebe08 R12: 0000000000000728
>>> [  593.241105] R13: ffff88826baba6d0 R14: ffffc90000057a40 R15: ffff888273482158
>>> [  593.241120] FS:  00007f4613fb3900(0000) GS:ffff888277a80000(0000) knlGS:0000000000000000
>>> [  593.241133] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [  593.241146] CR2: 00007f57d3c66a84 CR3: 000000026e2b6000 CR4: 00000000001406e0
>>> [  593.241158] Call Trace:
>>> [  593.241233]  i915_schedule+0x1f/0x30 [i915]
>>> [  593.241326]  i915_request_add+0x1a9/0x290 [i915]
>>> [  593.241393]  i915_gem_do_execbuffer+0x45f/0x1150 [i915]
>>> [  593.241411]  ? init_object+0x49/0x80
>>> [  593.241425]  ? ___slab_alloc.constprop.91+0x4b8/0x4e0
>>> [  593.241491]  ? i915_gem_execbuffer2_ioctl+0x99/0x380 [i915]
>>> [  593.241563]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
>>> [  593.241629]  i915_gem_execbuffer2_ioctl+0x1bb/0x380 [i915]
>>> [  593.241705]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
>>> [  593.241724]  drm_ioctl_kernel+0x81/0xd0
>>> [  593.241738]  drm_ioctl+0x1a7/0x310
>>> [  593.241803]  ? i915_gem_execbuffer_ioctl+0x270/0x270 [i915]
>>> [  593.241819]  ? __update_load_avg_se+0x1c9/0x240
>>> [  593.241834]  ? pick_next_entity+0x7e/0x120
>>> [  593.241851]  do_vfs_ioctl+0x88/0x5d0
>>> [  593.241880]  ksys_ioctl+0x35/0x70
>>> [  593.241894]  __x64_sys_ioctl+0x11/0x20
>>> [  593.241907]  do_syscall_64+0x44/0xf0
>>> [  593.241924]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>> [  593.241940] RIP: 0033:0x7f4615ffe757
>>> [  593.241952] Code: 00 00 90 48 8b 05 39 a7 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 09 a7 0c 00 f7 d8 64 89 01 48
>>> [  593.241970] RSP: 002b:00007ffc1030ddf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
>>> [  593.241984] RAX: ffffffffffffffda RBX: 00007ffc10324420 RCX: 00007f4615ffe757
>>> [  593.241997] RDX: 00007ffc1030e220 RSI: 0000000040406469 RDI: 0000000000000003
>>> [  593.242010] RBP: 00007ffc1030e220 R08: 00007f46160c9208 R09: 00007f46160c9240
>>> [  593.242022] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000040406469
>>> [  593.242038] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000
>>> [  593.242058] Modules linked in: i915 intel_gtt drm_kms_helper prime_numbers
>>>
>>> v2: Track the local engine cache and explicitly clear it when switching
>>> engine locks.
>>>
>>> Fixes: a02eb975be78 ("drm/i915/execlists: Cache the priolist when rescheduling")
>>> Testcase: igt/gem_exec_whisper/contexts-priority # rare!
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Michał Winiarski <michal.winiarski@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_scheduler.c | 27 +++++++++++++++++----------
>>>    1 file changed, 17 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
>>> index d01683167c77..8bc042551692 100644
>>> --- a/drivers/gpu/drm/i915/i915_scheduler.c
>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
>>> @@ -223,8 +223,14 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
>>>        return &p->requests[idx];
>>>    }
>>>    
>>> +struct sched_cache {
>>> +     struct list_head *priolist;
>>> +};
>>
>> Do you plan to add more stuff since you decided to wrap into a struct
>> and use memset?
> 
> Not right now, but I preferred the approach of being having the switch
> of engines also clearing the associated state.
> 
> I've thought about sticking something else in there, but I can't
> remember what -- it's the iterative DFS that gets highlighted in profiles
> (not too unsurprisingly). Oh, I remember it was about trying to fiddle
> with per-engine queue priority before inflight().

I am not sure about wrapping at this point, but I guess it's passable, 
especially since it fixes a bug.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index d01683167c77..8bc042551692 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -223,8 +223,14 @@  i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	return &p->requests[idx];
 }
 
+struct sched_cache {
+	struct list_head *priolist;
+};
+
 static struct intel_engine_cs *
-sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
+sched_lock_engine(const struct i915_sched_node *node,
+		  struct intel_engine_cs *locked,
+		  struct sched_cache *cache)
 {
 	struct intel_engine_cs *engine = node_to_request(node)->engine;
 
@@ -232,6 +238,7 @@  sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 
 	if (engine != locked) {
 		spin_unlock(&locked->timeline.lock);
+		memset(cache, 0, sizeof(*cache));
 		spin_lock(&engine->timeline.lock);
 	}
 
@@ -253,11 +260,11 @@  static bool inflight(const struct i915_request *rq,
 static void __i915_schedule(struct i915_request *rq,
 			    const struct i915_sched_attr *attr)
 {
-	struct list_head *uninitialized_var(pl);
-	struct intel_engine_cs *engine, *last;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	const int prio = attr->priority;
+	struct sched_cache cache;
 	LIST_HEAD(dfs);
 
 	/* Needed in order to use the temporary link inside i915_dependency */
@@ -328,7 +335,7 @@  static void __i915_schedule(struct i915_request *rq,
 		__list_del_entry(&stack.dfs_link);
 	}
 
-	last = NULL;
+	memset(&cache, 0, sizeof(cache));
 	engine = rq->engine;
 	spin_lock_irq(&engine->timeline.lock);
 
@@ -338,7 +345,7 @@  static void __i915_schedule(struct i915_request *rq,
 
 		INIT_LIST_HEAD(&dep->dfs_link);
 
-		engine = sched_lock_engine(node, engine);
+		engine = sched_lock_engine(node, engine, &cache);
 		lockdep_assert_held(&engine->timeline.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
@@ -347,11 +354,11 @@  static void __i915_schedule(struct i915_request *rq,
 
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
-			if (last != engine) {
-				pl = i915_sched_lookup_priolist(engine, prio);
-				last = engine;
-			}
-			list_move_tail(&node->link, pl);
+			if (!cache.priolist)
+				cache.priolist =
+					i915_sched_lookup_priolist(engine,
+								   prio);
+			list_move_tail(&node->link, cache.priolist);
 		} else {
 			/*
 			 * If the request is not in the priolist queue because