diff mbox series

[CI,1/4] drm/i915/gt: Mark the execlists->active as the primary volatile access

Message ID 20191124170524.1436498-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [CI,1/4] drm/i915/gt: Mark the execlists->active as the primary volatile access | expand

Commit Message

Chris Wilson Nov. 24, 2019, 5:05 p.m. UTC
Since we want to do a lockless read of the current active request, and
that request is written to by process_csb also without serialisation, we
need to instruct gcc to take care in reading the pointer itself.

Otherwise, we have observed execlists_active() to report 0x40.

[ 2400.760381] igt/para-4098    1..s. 2376479300us : process_csb: rcs0 cs-irq head=3, tail=4
[ 2400.760826] igt/para-4098    1..s. 2376479303us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000
[ 2400.761271] igt/para-4098    1..s. 2376479306us : trace_ports: rcs0: promote { b9c59:2622, b9c55:2624 }
[ 2400.761726] igt/para-4097    0d... 2376479311us : __i915_schedule: rcs0: -2147483648->3, inflight:0000000000000040, rq:ffff888208c1e940

which is impossible!

The answer is that as we keep the existing execlists->active pointing
into the array as we copy over that array, the unserialised read may see
a partial pointer value.

Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine.h |  4 +---
 drivers/gpu/drm/i915/gt/intel_lrc.c    | 24 ++++++++++++++----------
 2 files changed, 15 insertions(+), 13 deletions(-)

Comments

Mika Kuoppala Nov. 25, 2019, 9:16 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we want to do a lockless read of the current active request, and
> that request is written to by process_csb also without serialisation, we
> need to instruct gcc to take care in reading the pointer itself.
>
> Otherwise, we have observed execlists_active() to report 0x40.
>
> [ 2400.760381] igt/para-4098    1..s. 2376479300us : process_csb: rcs0 cs-irq head=3, tail=4
> [ 2400.760826] igt/para-4098    1..s. 2376479303us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000
> [ 2400.761271] igt/para-4098    1..s. 2376479306us : trace_ports: rcs0: promote { b9c59:2622, b9c55:2624 }
> [ 2400.761726] igt/para-4097    0d... 2376479311us : __i915_schedule: rcs0: -2147483648->3, inflight:0000000000000040, rq:ffff888208c1e940

Where is this exact tracepoint? My grep skills are failing me.

>
> which is impossible!
>
> The answer is that as we keep the existing execlists->active pointing
> into the array as we copy over that array, the unserialised read may see
> a partial pointer value.

...otherwise we will see ?

Also, the 0x40 is bothering me as I didn't find the tracepoint. If we
only displayed pointer values, where did the offset appear. 

>
> Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine.h |  4 +---
>  drivers/gpu/drm/i915/gt/intel_lrc.c    | 24 ++++++++++++++----------
>  2 files changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index bc3b72bfa9e3..01765a7ec18f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
>  static inline struct i915_request *
>  execlists_active(const struct intel_engine_execlists *execlists)
>  {
> -	GEM_BUG_ON(execlists->active - execlists->inflight >
> -		   execlists_num_ports(execlists));
> -	return READ_ONCE(*execlists->active);
> +	return *READ_ONCE(execlists->active);

Yes this seems proper as we need apriori read before deferencing.

>  }
>  
>  static inline void
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0e2065a13f24..0d0dca3d6724 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2169,23 +2169,27 @@ static void process_csb(struct intel_engine_cs *engine)
>  		else
>  			promote = gen8_csb_parse(execlists, buf + 2 * head);
>  		if (promote) {
> +			struct i915_request * const *old = execlists->active;
> +
> +			/* Point active to the new ELSP; prevent overwriting */
> +			WRITE_ONCE(execlists->active, execlists->pending);
> +			set_timeslice(engine);

If we set the active to pending here...

> +
>  			if (!inject_preempt_hang(execlists))
>  				ring_set_paused(engine, 0);
>  
>  			/* cancel old inflight, prepare for switch */
> -			trace_ports(execlists, "preempted", execlists->active);
> -			while (*execlists->active)
> -				execlists_schedule_out(*execlists->active++);
> +			trace_ports(execlists, "preempted", old);
> +			while (*old)
> +				execlists_schedule_out(*old++);
>  
>  			/* switch pending to inflight */
>  			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
> -			execlists->active =
> -				memcpy(execlists->inflight,
> -				       execlists->pending,
> -				       execlists_num_ports(execlists) *
> -				       sizeof(*execlists->pending));
> -
> -			set_timeslice(engine);
> +			WRITE_ONCE(execlists->active,
> +				   memcpy(execlists->inflight,
> +					  execlists->pending,
> +					  execlists_num_ports(execlists) *
> +					  sizeof(*execlists->pending)));

Why we rewrite it in here, is the pending moving beneath us?

-Mika

>  
>  			WRITE_ONCE(execlists->pending[0], NULL);
>  		} else {
> -- 
> 2.24.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Nov. 25, 2019, 9:23 a.m. UTC | #2
Quoting Mika Kuoppala (2019-11-25 09:16:30)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Since we want to do a lockless read of the current active request, and
> > that request is written to by process_csb also without serialisation, we
> > need to instruct gcc to take care in reading the pointer itself.
> >
> > Otherwise, we have observed execlists_active() to report 0x40.
> >
> > [ 2400.760381] igt/para-4098    1..s. 2376479300us : process_csb: rcs0 cs-irq head=3, tail=4
> > [ 2400.760826] igt/para-4098    1..s. 2376479303us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000
> > [ 2400.761271] igt/para-4098    1..s. 2376479306us : trace_ports: rcs0: promote { b9c59:2622, b9c55:2624 }
> > [ 2400.761726] igt/para-4097    0d... 2376479311us : __i915_schedule: rcs0: -2147483648->3, inflight:0000000000000040, rq:ffff888208c1e940
> 
> Where is this exact tracepoint? My grep skills are failing me.

I added to see
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7388/fi-bsw-n3050/igt@i915_selftest@live_gem_contexts.html

> >
> > which is impossible!
> >
> > The answer is that as we keep the existing execlists->active pointing
> > into the array as we copy over that array, the unserialised read may see
> > a partial pointer value.
> 
> ...otherwise we will see ?
> 
> Also, the 0x40 is bothering me as I didn't find the tracepoint. If we
> only displayed pointer values, where did the offset appear. 

Because we did a byte-by-byte copy of pending to inflight as
execlists_active() reads *active [pointing into inflight]

So inflight is a random mix of NULL + rq, starting at the LSB.

> > Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine.h |  4 +---
> >  drivers/gpu/drm/i915/gt/intel_lrc.c    | 24 ++++++++++++++----------
> >  2 files changed, 15 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> > index bc3b72bfa9e3..01765a7ec18f 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
> >  static inline struct i915_request *
> >  execlists_active(const struct intel_engine_execlists *execlists)
> >  {
> > -     GEM_BUG_ON(execlists->active - execlists->inflight >
> > -                execlists_num_ports(execlists));
> > -     return READ_ONCE(*execlists->active);
> > +     return *READ_ONCE(execlists->active);
> 
> Yes this seems proper as we need apriori read before deferencing.
> 
> >  }
> >  
> >  static inline void
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 0e2065a13f24..0d0dca3d6724 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2169,23 +2169,27 @@ static void process_csb(struct intel_engine_cs *engine)
> >               else
> >                       promote = gen8_csb_parse(execlists, buf + 2 * head);
> >               if (promote) {
> > +                     struct i915_request * const *old = execlists->active;
> > +
> > +                     /* Point active to the new ELSP; prevent overwriting */
> > +                     WRITE_ONCE(execlists->active, execlists->pending);
> > +                     set_timeslice(engine);
> 
> If we set the active to pending here...
> 
> > +
> >                       if (!inject_preempt_hang(execlists))
> >                               ring_set_paused(engine, 0);
> >  
> >                       /* cancel old inflight, prepare for switch */
> > -                     trace_ports(execlists, "preempted", execlists->active);
> > -                     while (*execlists->active)
> > -                             execlists_schedule_out(*execlists->active++);
> > +                     trace_ports(execlists, "preempted", old);
> > +                     while (*old)
> > +                             execlists_schedule_out(*old++);
> >  
> >                       /* switch pending to inflight */
> >                       GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
> > -                     execlists->active =
> > -                             memcpy(execlists->inflight,
> > -                                    execlists->pending,
> > -                                    execlists_num_ports(execlists) *
> > -                                    sizeof(*execlists->pending));
> > -
> > -                     set_timeslice(engine);
> > +                     WRITE_ONCE(execlists->active,
> > +                                memcpy(execlists->inflight,
> > +                                       execlists->pending,
> > +                                       execlists_num_ports(execlists) *
> > +                                       sizeof(*execlists->pending)));
> 
> Why we rewrite it in here, is the pending moving beneath us?

Yes. Pending is where we track the next submit, inflight + active the
current. pending[0] = NULL is the next line, and pending[] is then set
in dequeue.
-Chris
Mika Kuoppala Nov. 25, 2019, 9:38 a.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-11-25 09:16:30)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Since we want to do a lockless read of the current active request, and
>> > that request is written to by process_csb also without serialisation, we
>> > need to instruct gcc to take care in reading the pointer itself.
>> >
>> > Otherwise, we have observed execlists_active() to report 0x40.
>> >
>> > [ 2400.760381] igt/para-4098    1..s. 2376479300us : process_csb: rcs0 cs-irq head=3, tail=4
>> > [ 2400.760826] igt/para-4098    1..s. 2376479303us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000
>> > [ 2400.761271] igt/para-4098    1..s. 2376479306us : trace_ports: rcs0: promote { b9c59:2622, b9c55:2624 }
>> > [ 2400.761726] igt/para-4097    0d... 2376479311us : __i915_schedule: rcs0: -2147483648->3, inflight:0000000000000040, rq:ffff888208c1e940
>> 
>> Where is this exact tracepoint? My grep skills are failing me.
>
> I added to see
> https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7388/fi-bsw-n3050/igt@i915_selftest@live_gem_contexts.html
>
>> >
>> > which is impossible!
>> >
>> > The answer is that as we keep the existing execlists->active pointing
>> > into the array as we copy over that array, the unserialised read may see
>> > a partial pointer value.
>> 
>> ...otherwise we will see ?
>> 
>> Also, the 0x40 is bothering me as I didn't find the tracepoint. If we
>> only displayed pointer values, where did the offset appear. 
>
> Because we did a byte-by-byte copy of pending to inflight as
> execlists_active() reads *active [pointing into inflight]
>
> So inflight is a random mix of NULL + rq, starting at the LSB.

Seems so, yeah we can't really assume memcpy would do anything
fancier.

Ok, put a WRITE_ONCE for changing the active on
cancel_port_requests() too, for symmetry.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>
>> > Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock")
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/gt/intel_engine.h |  4 +---
>> >  drivers/gpu/drm/i915/gt/intel_lrc.c    | 24 ++++++++++++++----------
>> >  2 files changed, 15 insertions(+), 13 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
>> > index bc3b72bfa9e3..01765a7ec18f 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
>> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
>> > @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
>> >  static inline struct i915_request *
>> >  execlists_active(const struct intel_engine_execlists *execlists)
>> >  {
>> > -     GEM_BUG_ON(execlists->active - execlists->inflight >
>> > -                execlists_num_ports(execlists));
>> > -     return READ_ONCE(*execlists->active);
>> > +     return *READ_ONCE(execlists->active);
>> 
>> Yes this seems proper as we need apriori read before deferencing.
>> 
>> >  }
>> >  
>> >  static inline void
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > index 0e2065a13f24..0d0dca3d6724 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > @@ -2169,23 +2169,27 @@ static void process_csb(struct intel_engine_cs *engine)
>> >               else
>> >                       promote = gen8_csb_parse(execlists, buf + 2 * head);
>> >               if (promote) {
>> > +                     struct i915_request * const *old = execlists->active;
>> > +
>> > +                     /* Point active to the new ELSP; prevent overwriting */
>> > +                     WRITE_ONCE(execlists->active, execlists->pending);
>> > +                     set_timeslice(engine);
>> 
>> If we set the active to pending here...
>> 
>> > +
>> >                       if (!inject_preempt_hang(execlists))
>> >                               ring_set_paused(engine, 0);
>> >  
>> >                       /* cancel old inflight, prepare for switch */
>> > -                     trace_ports(execlists, "preempted", execlists->active);
>> > -                     while (*execlists->active)
>> > -                             execlists_schedule_out(*execlists->active++);
>> > +                     trace_ports(execlists, "preempted", old);
>> > +                     while (*old)
>> > +                             execlists_schedule_out(*old++);
>> >  
>> >                       /* switch pending to inflight */
>> >                       GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
>> > -                     execlists->active =
>> > -                             memcpy(execlists->inflight,
>> > -                                    execlists->pending,
>> > -                                    execlists_num_ports(execlists) *
>> > -                                    sizeof(*execlists->pending));
>> > -
>> > -                     set_timeslice(engine);
>> > +                     WRITE_ONCE(execlists->active,
>> > +                                memcpy(execlists->inflight,
>> > +                                       execlists->pending,
>> > +                                       execlists_num_ports(execlists) *
>> > +                                       sizeof(*execlists->pending)));
>> 
>> Why we rewrite it in here, is the pending moving beneath us?
>
> Yes. Pending is where we track the next submit, inflight + active the
> current. pending[0] = NULL is the next line, and pending[] is then set
> in dequeue.
> -Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index bc3b72bfa9e3..01765a7ec18f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -100,9 +100,7 @@  execlists_num_ports(const struct intel_engine_execlists * const execlists)
 static inline struct i915_request *
 execlists_active(const struct intel_engine_execlists *execlists)
 {
-	GEM_BUG_ON(execlists->active - execlists->inflight >
-		   execlists_num_ports(execlists));
-	return READ_ONCE(*execlists->active);
+	return *READ_ONCE(execlists->active);
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0e2065a13f24..0d0dca3d6724 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2169,23 +2169,27 @@  static void process_csb(struct intel_engine_cs *engine)
 		else
 			promote = gen8_csb_parse(execlists, buf + 2 * head);
 		if (promote) {
+			struct i915_request * const *old = execlists->active;
+
+			/* Point active to the new ELSP; prevent overwriting */
+			WRITE_ONCE(execlists->active, execlists->pending);
+			set_timeslice(engine);
+
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
 
 			/* cancel old inflight, prepare for switch */
-			trace_ports(execlists, "preempted", execlists->active);
-			while (*execlists->active)
-				execlists_schedule_out(*execlists->active++);
+			trace_ports(execlists, "preempted", old);
+			while (*old)
+				execlists_schedule_out(*old++);
 
 			/* switch pending to inflight */
 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
-			execlists->active =
-				memcpy(execlists->inflight,
-				       execlists->pending,
-				       execlists_num_ports(execlists) *
-				       sizeof(*execlists->pending));
-
-			set_timeslice(engine);
+			WRITE_ONCE(execlists->active,
+				   memcpy(execlists->inflight,
+					  execlists->pending,
+					  execlists_num_ports(execlists) *
+					  sizeof(*execlists->pending)));
 
 			WRITE_ONCE(execlists->pending[0], NULL);
 		} else {