diff mbox

[01/71] drm/i915/execlists: Drop preemption arbitrations points along the ring

Message ID 20180503063757.22238-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 3, 2018, 6:36 a.m. UTC
Limit the arbitration (where preemption may occur) to inside the batch,
and prevent it from happening on the pipecontrols/flushes we use to
write the breadcrumb seqno. Once the user batch is complete, we have
nothing left to do but serialise and emit the breadcrumb; switching
contexts at this point is futile so don't.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Lionel Landwerlin May 3, 2018, 10:13 a.m. UTC | #1
On 03/05/18 07:36, Chris Wilson wrote:
> Limit the arbitration (where preemption may occur) to inside the batch,
> and prevent it from happening on the pipecontrols/flushes we use to
> write the breadcrumb seqno. Once the user batch is complete, we have
> nothing left to do but serialise and emit the breadcrumb; switching
> contexts at this point is futile so don't.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++---
>   1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e04798e98db2..70b722c36e65 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1934,7 +1934,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>   		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
>   	}
>   
> -	cs = intel_ring_begin(rq, 4);
> +	cs = intel_ring_begin(rq, 6);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> @@ -1963,6 +1963,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>   		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
>   	*cs++ = lower_32_bits(offset);
>   	*cs++ = upper_32_bits(offset);
> +
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> +	*cs++ = MI_NOOP;
>   	intel_ring_advance(rq, cs);
>   
>   	return 0;
> @@ -2105,7 +2108,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
>   	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
>   				  intel_hws_seqno_address(request->engine));
>   	*cs++ = MI_USER_INTERRUPT;
> -	*cs++ = MI_NOOP;
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>   	request->tail = intel_ring_offset(request, cs);
>   	assert_ring_tail_valid(request->ring, request->tail);
>   
> @@ -2121,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
>   				      intel_hws_seqno_address(request->engine));
>   	*cs++ = MI_USER_INTERRUPT;
> -	*cs++ = MI_NOOP;
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>   	request->tail = intel_ring_offset(request, cs);
>   	assert_ring_tail_valid(request->ring, request->tail);
>   

Looks good to me. Just one question, you're adding a NOOP in one place, 
dropping it in the other 2. What the rational?
Chris Wilson May 3, 2018, 10:18 a.m. UTC | #2
Quoting Lionel Landwerlin (2018-05-03 11:13:05)
> On 03/05/18 07:36, Chris Wilson wrote:
> > Limit the arbitration (where preemption may occur) to inside the batch,
> > and prevent it from happening on the pipecontrols/flushes we use to
> > write the breadcrumb seqno. Once the user batch is complete, we have
> > nothing left to do but serialise and emit the breadcrumb; switching
> > contexts at this point is futile so don't.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Michel Thierry <michel.thierry@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++---
> >   1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index e04798e98db2..70b722c36e65 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -1934,7 +1934,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
> >               rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
> >       }
> >   
> > -     cs = intel_ring_begin(rq, 4);
> > +     cs = intel_ring_begin(rq, 6);
> >       if (IS_ERR(cs))
> >               return PTR_ERR(cs);
> >   
> > @@ -1963,6 +1963,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
> >               (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
> >       *cs++ = lower_32_bits(offset);
> >       *cs++ = upper_32_bits(offset);
> > +
> > +     *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> > +     *cs++ = MI_NOOP;
> >       intel_ring_advance(rq, cs);
> >   
> >       return 0;
> > @@ -2105,7 +2108,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
> >       cs = gen8_emit_ggtt_write(cs, request->global_seqno,
> >                                 intel_hws_seqno_address(request->engine));
> >       *cs++ = MI_USER_INTERRUPT;
> > -     *cs++ = MI_NOOP;
> > +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> >       request->tail = intel_ring_offset(request, cs);
> >       assert_ring_tail_valid(request->ring, request->tail);
> >   
> > @@ -2121,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> >       cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
> >                                     intel_hws_seqno_address(request->engine));
> >       *cs++ = MI_USER_INTERRUPT;
> > -     *cs++ = MI_NOOP;
> > +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> >       request->tail = intel_ring_offset(request, cs);
> >       assert_ring_tail_valid(request->ring, request->tail);
> >   
> 
> Looks good to me. Just one question, you're adding a NOOP in one place, 
> dropping it in the other 2. What the rational?

Writes into the ring have to be in multiples of 2 dwords. It doesn't
strictly, as only the RING_TAIL has to be qword aligned and we could fix
up the odd dword at the end, but for now the code insists on every
packet being an even number of dwords, hence padding with NOPs.
-Chris
Lionel Landwerlin May 3, 2018, 10:28 a.m. UTC | #3
On 03/05/18 11:18, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2018-05-03 11:13:05)
>> On 03/05/18 07:36, Chris Wilson wrote:
>>> Limit the arbitration (where preemption may occur) to inside the batch,
>>> and prevent it from happening on the pipecontrols/flushes we use to
>>> write the breadcrumb seqno. Once the user batch is complete, we have
>>> nothing left to do but serialise and emit the breadcrumb; switching
>>> contexts at this point is futile so don't.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Michał Winiarski <michal.winiarski@intel.com>
>>> Cc: Michel Thierry <michel.thierry@intel.com>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++---
>>>    1 file changed, 6 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index e04798e98db2..70b722c36e65 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -1934,7 +1934,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>>>                rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
>>>        }
>>>    
>>> -     cs = intel_ring_begin(rq, 4);
>>> +     cs = intel_ring_begin(rq, 6);
>>>        if (IS_ERR(cs))
>>>                return PTR_ERR(cs);
>>>    
>>> @@ -1963,6 +1963,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>>>                (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
>>>        *cs++ = lower_32_bits(offset);
>>>        *cs++ = upper_32_bits(offset);
>>> +
>>> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
>>> +     *cs++ = MI_NOOP;
>>>        intel_ring_advance(rq, cs);
>>>    
>>>        return 0;
>>> @@ -2105,7 +2108,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
>>>        cs = gen8_emit_ggtt_write(cs, request->global_seqno,
>>>                                  intel_hws_seqno_address(request->engine));
>>>        *cs++ = MI_USER_INTERRUPT;
>>> -     *cs++ = MI_NOOP;
>>> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>>>        request->tail = intel_ring_offset(request, cs);
>>>        assert_ring_tail_valid(request->ring, request->tail);
>>>    
>>> @@ -2121,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>>>        cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
>>>                                      intel_hws_seqno_address(request->engine));
>>>        *cs++ = MI_USER_INTERRUPT;
>>> -     *cs++ = MI_NOOP;
>>> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>>>        request->tail = intel_ring_offset(request, cs);
>>>        assert_ring_tail_valid(request->ring, request->tail);
>>>    
>> Looks good to me. Just one question, you're adding a NOOP in one place,
>> dropping it in the other 2. What the rational?
> Writes into the ring have to be in multiples of 2 dwords. It doesn't
> strictly, as only the RING_TAIL has to be qword aligned and we could fix
> up the odd dword at the end, but for now the code insists on every
> packet being an even number of dwords, hence padding with NOPs.
> -Chris
>
Thanks,

Would it make sense to add a GEM_BUG_ON() in intel_ring_advance() maybe?

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Chris Wilson May 3, 2018, 10:38 a.m. UTC | #4
Quoting Lionel Landwerlin (2018-05-03 11:28:42)
> On 03/05/18 11:18, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2018-05-03 11:13:05)
> >> On 03/05/18 07:36, Chris Wilson wrote:
> >>> Limit the arbitration (where preemption may occur) to inside the batch,
> >>> and prevent it from happening on the pipecontrols/flushes we use to
> >>> write the breadcrumb seqno. Once the user batch is complete, we have
> >>> nothing left to do but serialise and emit the breadcrumb; switching
> >>> contexts at this point is futile so don't.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Michał Winiarski <michal.winiarski@intel.com>
> >>> Cc: Michel Thierry <michel.thierry@intel.com>
> >>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++---
> >>>    1 file changed, 6 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >>> index e04798e98db2..70b722c36e65 100644
> >>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>> @@ -1934,7 +1934,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
> >>>                rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
> >>>        }
> >>>    
> >>> -     cs = intel_ring_begin(rq, 4);
> >>> +     cs = intel_ring_begin(rq, 6);
> >>>        if (IS_ERR(cs))
> >>>                return PTR_ERR(cs);
> >>>    
> >>> @@ -1963,6 +1963,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
> >>>                (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
> >>>        *cs++ = lower_32_bits(offset);
> >>>        *cs++ = upper_32_bits(offset);
> >>> +
> >>> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> >>> +     *cs++ = MI_NOOP;
> >>>        intel_ring_advance(rq, cs);
> >>>    
> >>>        return 0;
> >>> @@ -2105,7 +2108,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
> >>>        cs = gen8_emit_ggtt_write(cs, request->global_seqno,
> >>>                                  intel_hws_seqno_address(request->engine));
> >>>        *cs++ = MI_USER_INTERRUPT;
> >>> -     *cs++ = MI_NOOP;
> >>> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> >>>        request->tail = intel_ring_offset(request, cs);
> >>>        assert_ring_tail_valid(request->ring, request->tail);
> >>>    
> >>> @@ -2121,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> >>>        cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
> >>>                                      intel_hws_seqno_address(request->engine));
> >>>        *cs++ = MI_USER_INTERRUPT;
> >>> -     *cs++ = MI_NOOP;
> >>> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> >>>        request->tail = intel_ring_offset(request, cs);
> >>>        assert_ring_tail_valid(request->ring, request->tail);
> >>>    
> >> Looks good to me. Just one question, you're adding a NOOP in one place,
> >> dropping it in the other 2. What the rational?
> > Writes into the ring have to be in multiples of 2 dwords. It doesn't
> > strictly, as only the RING_TAIL has to be qword aligned and we could fix
> > up the odd dword at the end, but for now the code insists on every
> > packet being an even number of dwords, hence padding with NOPs.
> > -Chris
> >
> Thanks,
> 
> Would it make sense to add a GEM_BUG_ON() in intel_ring_advance() maybe?

It's in intel_ring_begin(). intel_ring_advance() just makes sure you
wrote exactly the number of dwords you said you would.

What we can do it preallocate that extra padding in intel_ring_begin()
and to the alignment at intel_ring_get_tail(). It's a bit of a fiddle
for the odd dword saved, but easy enough and we should lose any of our
sanity checks.
-Chris
Tvrtko Ursulin May 3, 2018, 4:37 p.m. UTC | #5
On 03/05/2018 07:36, Chris Wilson wrote:
> Limit the arbitration (where preemption may occur) to inside the batch,
> and prevent it from happening on the pipecontrols/flushes we use to
> write the breadcrumb seqno. Once the user batch is complete, we have
> nothing left to do but serialise and emit the breadcrumb; switching
> contexts at this point is futile so don't.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++---
>   1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e04798e98db2..70b722c36e65 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1934,7 +1934,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>   		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
>   	}
>   
> -	cs = intel_ring_begin(rq, 4);
> +	cs = intel_ring_begin(rq, 6);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> @@ -1963,6 +1963,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
>   		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
>   	*cs++ = lower_32_bits(offset);
>   	*cs++ = upper_32_bits(offset);
> +
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> +	*cs++ = MI_NOOP;
>   	intel_ring_advance(rq, cs);
>   
>   	return 0;
> @@ -2105,7 +2108,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
>   	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
>   				  intel_hws_seqno_address(request->engine));
>   	*cs++ = MI_USER_INTERRUPT;
> -	*cs++ = MI_NOOP;
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>   	request->tail = intel_ring_offset(request, cs);
>   	assert_ring_tail_valid(request->ring, request->tail);
>   
> @@ -2121,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
>   				      intel_hws_seqno_address(request->engine));
>   	*cs++ = MI_USER_INTERRUPT;
> -	*cs++ = MI_NOOP;
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>   	request->tail = intel_ring_offset(request, cs);
>   	assert_ring_tail_valid(request->ring, request->tail);
>   
> 

Sounds to me like a completely sensible thing to do.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e04798e98db2..70b722c36e65 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1934,7 +1934,7 @@  static int gen8_emit_bb_start(struct i915_request *rq,
 		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
 	}
 
-	cs = intel_ring_begin(rq, 4);
+	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -1963,6 +1963,9 @@  static int gen8_emit_bb_start(struct i915_request *rq,
 		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_NOOP;
 	intel_ring_advance(rq, cs);
 
 	return 0;
@@ -2105,7 +2108,7 @@  static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
@@ -2121,7 +2124,7 @@  static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
 				      intel_hws_seqno_address(request->engine));
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);