Message ID | 20180806083017.32215-6-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/6] drm/i915: Limit C-states when waiting for the active request | expand |
On 06/08/2018 09:30, Chris Wilson wrote: > Latency is in the eye of the beholder. In the case where a client stops > and waits for the gpu, give that request chain a small priority boost > (not so that it overtakes higher priority clients, to preserve the > external ordering) so that ideally the wait completes earlier. Going back to my comments made against the new clients boost patch - perhaps if you make it that preemption is not triggered for within internal priority delta maybe it is all acceptable. Not sure how much of the positive effect you observed remains though. Regards, Tvrtko > > Testcase: igt/gem_sync/switch-default > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> > --- > drivers/gpu/drm/i915/i915_gem.c | 5 +++- > drivers/gpu/drm/i915/i915_request.c | 2 ++ > drivers/gpu/drm/i915/i915_request.h | 5 ++-- > drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_scheduler.h | 5 +++- > 5 files changed, 41 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 460f256114f7..033d8057dd83 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, > */ > err = i915_gem_object_wait(obj, > I915_WAIT_INTERRUPTIBLE | > + I915_WAIT_PRIORITY | > (write_domain ? I915_WAIT_ALL : 0), > MAX_SCHEDULE_TIMEOUT, > to_rps_client(file)); > @@ -3733,7 +3734,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) > start = ktime_get(); > > ret = i915_gem_object_wait(obj, > - I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, > + I915_WAIT_INTERRUPTIBLE | > + I915_WAIT_PRIORITY | > + I915_WAIT_ALL, > to_wait_timeout(args->timeout_ns), > to_rps_client(file)); > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index e33cdd95bdc3..105a9e45277b 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1289,6 +1289,8 @@ long i915_request_wait(struct i915_request *rq, > add_wait_queue(errq, &reset); > > intel_wait_init(&wait); > + if (flags & I915_WAIT_PRIORITY) > + i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); > > restart: > do { > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 549d56d0ba1c..2898ca74fa27 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -269,8 +269,9 @@ long i915_request_wait(struct i915_request *rq, > __attribute__((nonnull(1))); > #define I915_WAIT_INTERRUPTIBLE BIT(0) > #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ > -#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ > -#define I915_WAIT_FOR_IDLE_BOOST BIT(3) > +#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ > +#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ > +#define I915_WAIT_FOR_IDLE_BOOST BIT(4) > > static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c > index 749a192b4628..2da651db5029 100644 > --- a/drivers/gpu/drm/i915/i915_scheduler.c > +++ b/drivers/gpu/drm/i915/i915_scheduler.c > @@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) > return engine; > } > > -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) > +static void __i915_schedule(struct i915_request *rq, > + const struct i915_sched_attr *attr) > { > struct list_head *uninitialized_var(pl); > struct intel_engine_cs *engine, *last; > @@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) > const int prio = attr->priority; > LIST_HEAD(dfs); > > + /* Needed in order to use the temporary link inside i915_dependency */ > + lockdep_assert_held(&schedule_lock); > GEM_BUG_ON(prio == I915_PRIORITY_INVALID); > > if (i915_request_completed(rq)) > @@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) > if (prio <= READ_ONCE(rq->sched.attr.priority)) > return; > > - /* Needed in order to use the temporary link inside i915_dependency */ > - spin_lock(&schedule_lock); > - > stack.signaler = &rq->sched; > list_add(&stack.dfs_link, &dfs); > > @@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) > rq->sched.attr = *attr; > > if (stack.dfs_link.next == stack.dfs_link.prev) > - goto out_unlock; > + return; > > __list_del_entry(&stack.dfs_link); > } > @@ -350,7 +350,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) > } > > spin_unlock_irq(&engine->timeline.lock); > +} > > -out_unlock: > +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) > +{ > + spin_lock(&schedule_lock); > + __i915_schedule(rq, attr); > spin_unlock(&schedule_lock); > } > + > +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) > +{ > + struct i915_sched_attr attr; > + > + GEM_BUG_ON(bump & I915_PRIORITY_MASK); > + > + if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) > + return; > + > + spin_lock_bh(&schedule_lock); > + > + attr = rq->sched.attr; > + attr.priority |= bump; > + __i915_schedule(rq, &attr); > + > + spin_unlock_bh(&schedule_lock); > +} > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h > index 9b21ff72f619..e5120ef974d4 100644 > --- a/drivers/gpu/drm/i915/i915_scheduler.h > +++ b/drivers/gpu/drm/i915/i915_scheduler.h > @@ -24,12 +24,13 @@ enum { > I915_PRIORITY_INVALID = INT_MIN > }; > > -#define I915_USER_PRIORITY_SHIFT 1 > +#define I915_USER_PRIORITY_SHIFT 2 > #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) > > #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) > #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT) > > +#define I915_PRIORITY_WAIT BIT(1) > #define I915_PRIORITY_NEWCLIENT BIT(0) > > struct i915_sched_attr { > @@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915, > void i915_schedule(struct i915_request *request, > const struct i915_sched_attr *attr); > > +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump); > + > struct list_head * > i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); > >
Quoting Tvrtko Ursulin (2018-08-06 10:53:52) > > On 06/08/2018 09:30, Chris Wilson wrote: > > Latency is in the eye of the beholder. In the case where a client stops > > and waits for the gpu, give that request chain a small priority boost > > (not so that it overtakes higher priority clients, to preserve the > > external ordering) so that ideally the wait completes earlier. > > Going back to my comments made against the new clients boost patch - > perhaps if you make it that preemption is not triggered for within > internal priority delta maybe it is all acceptable. Not sure how much of > the positive effect you observed remains though. That entirely defeats the point of being able to preempt within the same user priority; so no more "fairness" wrt FQ_CODEL and no more cheating here. -Chris
On 06/08/2018 11:03, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-08-06 10:53:52) >> >> On 06/08/2018 09:30, Chris Wilson wrote: >>> Latency is in the eye of the beholder. In the case where a client stops >>> and waits for the gpu, give that request chain a small priority boost >>> (not so that it overtakes higher priority clients, to preserve the >>> external ordering) so that ideally the wait completes earlier. >> >> Going back to my comments made against the new clients boost patch - >> perhaps if you make it that preemption is not triggered for within >> internal priority delta maybe it is all acceptable. Not sure how much of >> the positive effect you observed remains though. > > That entirely defeats the point of being able to preempt within the same > user priority; so no more "fairness" wrt FQ_CODEL and no more cheating > here. In this case postpone these tweaks until after we implement time-slicing? :) Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 460f256114f7..033d8057dd83 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, */ err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | (write_domain ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT, to_rps_client(file)); @@ -3733,7 +3734,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) start = ktime_get(); ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + I915_WAIT_ALL, to_wait_timeout(args->timeout_ns), to_rps_client(file)); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e33cdd95bdc3..105a9e45277b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1289,6 +1289,8 @@ long i915_request_wait(struct i915_request *rq, add_wait_queue(errq, &reset); intel_wait_init(&wait); + if (flags & I915_WAIT_PRIORITY) + i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); restart: do { diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 549d56d0ba1c..2898ca74fa27 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -269,8 +269,9 @@ long i915_request_wait(struct i915_request *rq, __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(3) +#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ +#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ +#define I915_WAIT_FOR_IDLE_BOOST BIT(4) static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 749a192b4628..2da651db5029 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) return engine; } -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +static void __i915_schedule(struct i915_request *rq, + const struct i915_sched_attr *attr) { struct list_head *uninitialized_var(pl); struct intel_engine_cs *engine, *last; @@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) const int prio = attr->priority; LIST_HEAD(dfs); + /* Needed in order to use the temporary link inside i915_dependency */ + lockdep_assert_held(&schedule_lock); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); if (i915_request_completed(rq)) @@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) if (prio <= READ_ONCE(rq->sched.attr.priority)) return; - /* Needed in order to use the temporary link inside i915_dependency */ - spin_lock(&schedule_lock); - stack.signaler = &rq->sched; list_add(&stack.dfs_link, &dfs); @@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) rq->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) - goto out_unlock; + return; __list_del_entry(&stack.dfs_link); } @@ -350,7 +350,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) } spin_unlock_irq(&engine->timeline.lock); +} -out_unlock: +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +{ + spin_lock(&schedule_lock); + __i915_schedule(rq, attr); spin_unlock(&schedule_lock); } + +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) +{ + struct i915_sched_attr attr; + + GEM_BUG_ON(bump & I915_PRIORITY_MASK); + + if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) + return; + + spin_lock_bh(&schedule_lock); + + attr = rq->sched.attr; + attr.priority |= bump; + __i915_schedule(rq, &attr); + + spin_unlock_bh(&schedule_lock); +} diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 9b21ff72f619..e5120ef974d4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -24,12 +24,13 @@ enum { I915_PRIORITY_INVALID = INT_MIN }; -#define I915_USER_PRIORITY_SHIFT 1 +#define I915_USER_PRIORITY_SHIFT 2 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT) +#define I915_PRIORITY_WAIT BIT(1) #define I915_PRIORITY_NEWCLIENT BIT(0) struct i915_sched_attr { @@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915, void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump); + struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
Latency is in the eye of the beholder. In the case where a client stops and waits for the gpu, give that request chain a small priority boost (not so that it overtakes higher priority clients, to preserve the external ordering) so that ideally the wait completes earlier. Testcase: igt/gem_sync/switch-default Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 5 +++- drivers/gpu/drm/i915/i915_request.c | 2 ++ drivers/gpu/drm/i915/i915_request.h | 5 ++-- drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_scheduler.h | 5 +++- 5 files changed, 41 insertions(+), 10 deletions(-)