Message ID | 20180629075348.27358-13-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 29/06/2018 08:53, Chris Wilson wrote: > In order to maximise concurrency between engines, if we queue a request > to a current idle ring, reorder its dependencies to execute that request > as early as possible and ideally improve occupancy of multiple engines > simultaneously. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_request.c | 6 +++++- > drivers/gpu/drm/i915/i915_scheduler.h | 5 +++-- > drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++ > 3 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 2d7a785dd88c..d618e7127e88 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request) > local_bh_disable(); > rcu_read_lock(); /* RCU serialisation for set-wedged protection */ > if (engine->schedule) { > - struct i915_sched_attr attr = request->gem_context->sched; > + struct i915_gem_context *ctx = request->gem_context; > + struct i915_sched_attr attr = ctx->sched; > > /* > * Boost priorities to new clients (new request flows). > @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request) > if (!prev || i915_request_completed(prev)) > attr.priority |= I915_PRIORITY_NEWCLIENT; > > + if (intel_engine_queue_is_empty(engine)) > + attr.priority |= I915_PRIORITY_STALL; > + > engine->schedule(request, &attr); > } > rcu_read_unlock(); > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h > index e9fb6c1d5e42..be132ceb83d9 100644 > --- a/drivers/gpu/drm/i915/i915_scheduler.h > +++ b/drivers/gpu/drm/i915/i915_scheduler.h > @@ -19,13 +19,14 @@ enum { > I915_PRIORITY_INVALID = INT_MIN > }; > > -#define I915_USER_PRIORITY_SHIFT 1 > +#define I915_USER_PRIORITY_SHIFT 2 > #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) > > #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) > #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT) > > -#define I915_PRIORITY_NEWCLIENT BIT(0) > +#define I915_PRIORITY_NEWCLIENT BIT(1) > +#define I915_PRIORITY_STALL BIT(0) So lower priority than new clients. Again I have big concerns. A client which happens to be the only one using some exotic engine would now be able to trash everything else running on the system. Just because so it happens no one else uses its favourite engine. And that's regardless how much work it has queued up on other, potentially busy, engines. This and the previous patch I'd say this - hello slippery slope of scheduler tuning! :)) Regards, Tvrtko > > struct i915_sched_attr { > /** > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index b9ea5cee249f..b26587d0c70d 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -1080,6 +1080,12 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) > > void intel_engines_sanitize(struct drm_i915_private *i915); > > +static inline bool > +intel_engine_queue_is_empty(const struct intel_engine_cs *engine) > +{ > + return RB_EMPTY_ROOT(&engine->execlists.queue.rb_root); > +} > + > bool intel_engine_is_idle(struct intel_engine_cs *engine); > bool intel_engines_are_idle(struct drm_i915_private *dev_priv); > >
Quoting Tvrtko Ursulin (2018-06-29 11:08:48) > > On 29/06/2018 08:53, Chris Wilson wrote: > > In order to maximise concurrency between engines, if we queue a request > > to a current idle ring, reorder its dependencies to execute that request > > as early as possible and ideally improve occupancy of multiple engines > > simultaneously. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/i915_request.c | 6 +++++- > > drivers/gpu/drm/i915/i915_scheduler.h | 5 +++-- > > drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++ > > 3 files changed, 14 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > index 2d7a785dd88c..d618e7127e88 100644 > > --- a/drivers/gpu/drm/i915/i915_request.c > > +++ b/drivers/gpu/drm/i915/i915_request.c > > @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request) > > local_bh_disable(); > > rcu_read_lock(); /* RCU serialisation for set-wedged protection */ > > if (engine->schedule) { > > - struct i915_sched_attr attr = request->gem_context->sched; > > + struct i915_gem_context *ctx = request->gem_context; > > + struct i915_sched_attr attr = ctx->sched; > > > > /* > > * Boost priorities to new clients (new request flows). > > @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request) > > if (!prev || i915_request_completed(prev)) > > attr.priority |= I915_PRIORITY_NEWCLIENT; > > > > + if (intel_engine_queue_is_empty(engine)) > > + attr.priority |= I915_PRIORITY_STALL; > > + > > engine->schedule(request, &attr); > > } > > rcu_read_unlock(); > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h > > index e9fb6c1d5e42..be132ceb83d9 100644 > > --- a/drivers/gpu/drm/i915/i915_scheduler.h > > +++ b/drivers/gpu/drm/i915/i915_scheduler.h > > @@ -19,13 +19,14 @@ enum { > > I915_PRIORITY_INVALID = INT_MIN > > }; > > > > -#define I915_USER_PRIORITY_SHIFT 1 > > +#define I915_USER_PRIORITY_SHIFT 2 > > #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) > > > > #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) > > #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT) > > > > -#define I915_PRIORITY_NEWCLIENT BIT(0) > > +#define I915_PRIORITY_NEWCLIENT BIT(1) > > +#define I915_PRIORITY_STALL BIT(0) > > So lower priority than new clients. > > Again I have big concerns. > > A client which happens to be the only one using some exotic engine would > now be able to trash everything else running on the system. Just because > so it happens no one else uses its favourite engine. And that's > regardless how much work it has queued up on other, potentially busy, > engines. Within the same priority level, below others, but just above steady state. Because of that the starvation issue here is no worse than at current. -Chris
On 29/06/2018 11:15, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-06-29 11:08:48) >> >> On 29/06/2018 08:53, Chris Wilson wrote: >>> In order to maximise concurrency between engines, if we queue a request >>> to a current idle ring, reorder its dependencies to execute that request >>> as early as possible and ideally improve occupancy of multiple engines >>> simultaneously. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> --- >>> drivers/gpu/drm/i915/i915_request.c | 6 +++++- >>> drivers/gpu/drm/i915/i915_scheduler.h | 5 +++-- >>> drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++ >>> 3 files changed, 14 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c >>> index 2d7a785dd88c..d618e7127e88 100644 >>> --- a/drivers/gpu/drm/i915/i915_request.c >>> +++ b/drivers/gpu/drm/i915/i915_request.c >>> @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request) >>> local_bh_disable(); >>> rcu_read_lock(); /* RCU serialisation for set-wedged protection */ >>> if (engine->schedule) { >>> - struct i915_sched_attr attr = request->gem_context->sched; >>> + struct i915_gem_context *ctx = request->gem_context; >>> + struct i915_sched_attr attr = ctx->sched; >>> >>> /* >>> * Boost priorities to new clients (new request flows). >>> @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request) >>> if (!prev || i915_request_completed(prev)) >>> attr.priority |= I915_PRIORITY_NEWCLIENT; >>> >>> + if (intel_engine_queue_is_empty(engine)) >>> + attr.priority |= I915_PRIORITY_STALL; >>> + >>> engine->schedule(request, &attr); >>> } >>> rcu_read_unlock(); >>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h >>> index e9fb6c1d5e42..be132ceb83d9 100644 >>> --- a/drivers/gpu/drm/i915/i915_scheduler.h >>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h >>> @@ -19,13 +19,14 @@ enum { >>> I915_PRIORITY_INVALID = INT_MIN >>> }; >>> >>> -#define I915_USER_PRIORITY_SHIFT 1 >>> +#define I915_USER_PRIORITY_SHIFT 2 >>> #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) >>> >>> #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) >>> #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT) >>> >>> -#define I915_PRIORITY_NEWCLIENT BIT(0) >>> +#define I915_PRIORITY_NEWCLIENT BIT(1) >>> +#define I915_PRIORITY_STALL BIT(0) >> >> So lower priority than new clients. >> >> Again I have big concerns. >> >> A client which happens to be the only one using some exotic engine would >> now be able to trash everything else running on the system. Just because >> so it happens no one else uses its favourite engine. And that's >> regardless how much work it has queued up on other, potentially busy, >> engines. > > Within the same priority level, below others, but just above steady > state. Because of that the starvation issue here is no worse than at > current. I don't follow. Currently the client which is the only one submitting to an engine won't cause preemption on other engines. With this patch it would, so it opens up a new set of unfairness issues. Or please correct me if I interpreted the commit message and code incorrectly. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2d7a785dd88c..d618e7127e88 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request) local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) { - struct i915_sched_attr attr = request->gem_context->sched; + struct i915_gem_context *ctx = request->gem_context; + struct i915_sched_attr attr = ctx->sched; /* * Boost priorities to new clients (new request flows). @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request) if (!prev || i915_request_completed(prev)) attr.priority |= I915_PRIORITY_NEWCLIENT; + if (intel_engine_queue_is_empty(engine)) + attr.priority |= I915_PRIORITY_STALL; + engine->schedule(request, &attr); } rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index e9fb6c1d5e42..be132ceb83d9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -19,13 +19,14 @@ enum { I915_PRIORITY_INVALID = INT_MIN }; -#define I915_USER_PRIORITY_SHIFT 1 +#define I915_USER_PRIORITY_SHIFT 2 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT) -#define I915_PRIORITY_NEWCLIENT BIT(0) +#define I915_PRIORITY_NEWCLIENT BIT(1) +#define I915_PRIORITY_STALL BIT(0) struct i915_sched_attr { /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b9ea5cee249f..b26587d0c70d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1080,6 +1080,12 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) void intel_engines_sanitize(struct drm_i915_private *i915); +static inline bool +intel_engine_queue_is_empty(const struct intel_engine_cs *engine) +{ + return RB_EMPTY_ROOT(&engine->execlists.queue.rb_root); +} + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
In order to maximise concurrency between engines, if we queue a request to a current idle ring, reorder its dependencies to execute that request as early as possible and ideally improve occupancy of multiple engines simultaneously. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_request.c | 6 +++++- drivers/gpu/drm/i915/i915_scheduler.h | 5 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++ 3 files changed, 14 insertions(+), 3 deletions(-)