diff mbox

[13/37] drm/i915: Priority boost switching to an idle ring

Message ID 20180629075348.27358-13-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 29, 2018, 7:53 a.m. UTC
In order to maximise concurrency between engines, if we queue a request
to a current idle ring, reorder its dependencies to execute that request
as early as possible and ideally improve occupancy of multiple engines
simultaneously.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
 drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

Comments

Tvrtko Ursulin June 29, 2018, 10:08 a.m. UTC | #1
On 29/06/2018 08:53, Chris Wilson wrote:
> In order to maximise concurrency between engines, if we queue a request
> to a current idle ring, reorder its dependencies to execute that request
> as early as possible and ideally improve occupancy of multiple engines
> simultaneously.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
>   drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
>   drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
>   3 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 2d7a785dd88c..d618e7127e88 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
>   	local_bh_disable();
>   	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>   	if (engine->schedule) {
> -		struct i915_sched_attr attr = request->gem_context->sched;
> +		struct i915_gem_context *ctx = request->gem_context;
> +		struct i915_sched_attr attr = ctx->sched;
>   
>   		/*
>   		 * Boost priorities to new clients (new request flows).
> @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
>   		if (!prev || i915_request_completed(prev))
>   			attr.priority |= I915_PRIORITY_NEWCLIENT;
>   
> +		if (intel_engine_queue_is_empty(engine))
> +			attr.priority |= I915_PRIORITY_STALL;
> +
>   		engine->schedule(request, &attr);
>   	}
>   	rcu_read_unlock();
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index e9fb6c1d5e42..be132ceb83d9 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -19,13 +19,14 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 1
> +#define I915_USER_PRIORITY_SHIFT 2
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> -#define I915_PRIORITY_NEWCLIENT BIT(0)
> +#define I915_PRIORITY_NEWCLIENT BIT(1)
> +#define I915_PRIORITY_STALL BIT(0)

So lower priority than new clients.

Again I have big concerns.

A client which happens to be the only one using some exotic engine would 
now be able to trash everything else running on the system. Just because 
so it happens no one else uses its favourite engine. And that's 
regardless how much work it has queued up on other, potentially busy, 
engines.

This and the previous patch I'd say this - hello slippery slope of 
scheduler tuning! :))

Regards,

Tvrtko

>   
>   struct i915_sched_attr {
>   	/**
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index b9ea5cee249f..b26587d0c70d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -1080,6 +1080,12 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
>   
>   void intel_engines_sanitize(struct drm_i915_private *i915);
>   
> +static inline bool
> +intel_engine_queue_is_empty(const struct intel_engine_cs *engine)
> +{
> +	return RB_EMPTY_ROOT(&engine->execlists.queue.rb_root);
> +}
> +
>   bool intel_engine_is_idle(struct intel_engine_cs *engine);
>   bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>   
>
Chris Wilson June 29, 2018, 10:15 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-06-29 11:08:48)
> 
> On 29/06/2018 08:53, Chris Wilson wrote:
> > In order to maximise concurrency between engines, if we queue a request
> > to a current idle ring, reorder its dependencies to execute that request
> > as early as possible and ideally improve occupancy of multiple engines
> > simultaneously.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
> >   drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
> >   drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
> >   3 files changed, 14 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 2d7a785dd88c..d618e7127e88 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
> >       local_bh_disable();
> >       rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> >       if (engine->schedule) {
> > -             struct i915_sched_attr attr = request->gem_context->sched;
> > +             struct i915_gem_context *ctx = request->gem_context;
> > +             struct i915_sched_attr attr = ctx->sched;
> >   
> >               /*
> >                * Boost priorities to new clients (new request flows).
> > @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
> >               if (!prev || i915_request_completed(prev))
> >                       attr.priority |= I915_PRIORITY_NEWCLIENT;
> >   
> > +             if (intel_engine_queue_is_empty(engine))
> > +                     attr.priority |= I915_PRIORITY_STALL;
> > +
> >               engine->schedule(request, &attr);
> >       }
> >       rcu_read_unlock();
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> > index e9fb6c1d5e42..be132ceb83d9 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > @@ -19,13 +19,14 @@ enum {
> >       I915_PRIORITY_INVALID = INT_MIN
> >   };
> >   
> > -#define I915_USER_PRIORITY_SHIFT 1
> > +#define I915_USER_PRIORITY_SHIFT 2
> >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> >   
> >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> >   
> > -#define I915_PRIORITY_NEWCLIENT BIT(0)
> > +#define I915_PRIORITY_NEWCLIENT BIT(1)
> > +#define I915_PRIORITY_STALL BIT(0)
> 
> So lower priority than new clients.
> 
> Again I have big concerns.
> 
> A client which happens to be the only one using some exotic engine would 
> now be able to trash everything else running on the system. Just because 
> so it happens no one else uses its favourite engine. And that's 
> regardless how much work it has queued up on other, potentially busy, 
> engines.

Within the same priority level, below others, but just above steady
state. Because of that the starvation issue here is no worse than at
current.
-Chris
Tvrtko Ursulin June 29, 2018, 10:41 a.m. UTC | #3
On 29/06/2018 11:15, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 11:08:48)
>>
>> On 29/06/2018 08:53, Chris Wilson wrote:
>>> In order to maximise concurrency between engines, if we queue a request
>>> to a current idle ring, reorder its dependencies to execute that request
>>> as early as possible and ideally improve occupancy of multiple engines
>>> simultaneously.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
>>>    drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
>>>    drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
>>>    3 files changed, 14 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 2d7a785dd88c..d618e7127e88 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
>>>        local_bh_disable();
>>>        rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>>>        if (engine->schedule) {
>>> -             struct i915_sched_attr attr = request->gem_context->sched;
>>> +             struct i915_gem_context *ctx = request->gem_context;
>>> +             struct i915_sched_attr attr = ctx->sched;
>>>    
>>>                /*
>>>                 * Boost priorities to new clients (new request flows).
>>> @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
>>>                if (!prev || i915_request_completed(prev))
>>>                        attr.priority |= I915_PRIORITY_NEWCLIENT;
>>>    
>>> +             if (intel_engine_queue_is_empty(engine))
>>> +                     attr.priority |= I915_PRIORITY_STALL;
>>> +
>>>                engine->schedule(request, &attr);
>>>        }
>>>        rcu_read_unlock();
>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>>> index e9fb6c1d5e42..be132ceb83d9 100644
>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>>> @@ -19,13 +19,14 @@ enum {
>>>        I915_PRIORITY_INVALID = INT_MIN
>>>    };
>>>    
>>> -#define I915_USER_PRIORITY_SHIFT 1
>>> +#define I915_USER_PRIORITY_SHIFT 2
>>>    #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>>>    
>>>    #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>>>    #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>>>    
>>> -#define I915_PRIORITY_NEWCLIENT BIT(0)
>>> +#define I915_PRIORITY_NEWCLIENT BIT(1)
>>> +#define I915_PRIORITY_STALL BIT(0)
>>
>> So lower priority than new clients.
>>
>> Again I have big concerns.
>>
>> A client which happens to be the only one using some exotic engine would
>> now be able to trash everything else running on the system. Just because
>> so it happens no one else uses its favourite engine. And that's
>> regardless how much work it has queued up on other, potentially busy,
>> engines.
> 
> Within the same priority level, below others, but just above steady
> state. Because of that the starvation issue here is no worse than at
> current.

I don't follow. Currently the client which is the only one submitting to 
an engine won't cause preemption on other engines. With this patch it 
would, so it opens up a new set of unfairness issues. Or please correct 
me if I interpreted the commit message and code incorrectly.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2d7a785dd88c..d618e7127e88 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1053,7 +1053,8 @@  void i915_request_add(struct i915_request *request)
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
-		struct i915_sched_attr attr = request->gem_context->sched;
+		struct i915_gem_context *ctx = request->gem_context;
+		struct i915_sched_attr attr = ctx->sched;
 
 		/*
 		 * Boost priorities to new clients (new request flows).
@@ -1064,6 +1065,9 @@  void i915_request_add(struct i915_request *request)
 		if (!prev || i915_request_completed(prev))
 			attr.priority |= I915_PRIORITY_NEWCLIENT;
 
+		if (intel_engine_queue_is_empty(engine))
+			attr.priority |= I915_PRIORITY_STALL;
+
 		engine->schedule(request, &attr);
 	}
 	rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index e9fb6c1d5e42..be132ceb83d9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,13 +19,14 @@  enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
-#define I915_PRIORITY_NEWCLIENT BIT(0)
+#define I915_PRIORITY_NEWCLIENT BIT(1)
+#define I915_PRIORITY_STALL BIT(0)
 
 struct i915_sched_attr {
 	/**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b9ea5cee249f..b26587d0c70d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1080,6 +1080,12 @@  gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 
 void intel_engines_sanitize(struct drm_i915_private *i915);
 
+static inline bool
+intel_engine_queue_is_empty(const struct intel_engine_cs *engine)
+{
+	return RB_EMPTY_ROOT(&engine->execlists.queue.rb_root);
+}
+
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);