diff mbox series

[17/40] drm/i915: Priority boost for waiting clients

Message ID 20180919195544.1511-17-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/40] drm: Use default dma_fence hooks where possible for null syncobj | expand

Commit Message

Chris Wilson Sept. 19, 2018, 7:55 p.m. UTC
Latency is in the eye of the beholder. In the case where a client stops
and waits for the gpu, give that request chain a small priority boost
(not so that it overtakes higher priority clients, to preserve the
external ordering) so that ideally the wait completes earlier.

Testcase: igt/gem_sync/switch-default
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
 drivers/gpu/drm/i915/i915_request.c   |  2 ++
 drivers/gpu/drm/i915/i915_request.h   |  5 ++--
 drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
 5 files changed, 41 insertions(+), 10 deletions(-)

Comments

Tvrtko Ursulin Sept. 24, 2018, 11:29 a.m. UTC | #1
On 19/09/2018 20:55, Chris Wilson wrote:
> Latency is in the eye of the beholder. In the case where a client stops
> and waits for the gpu, give that request chain a small priority boost
> (not so that it overtakes higher priority clients, to preserve the
> external ordering) so that ideally the wait completes earlier.
> 
> Testcase: igt/gem_sync/switch-default
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
>   drivers/gpu/drm/i915/i915_request.c   |  2 ++
>   drivers/gpu/drm/i915/i915_request.h   |  5 ++--
>   drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
>   drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
>   5 files changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6b347ffb996b..2fa75f2a1980 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	 */
>   	err = i915_gem_object_wait(obj,
>   				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
>   				   (write_domain ? I915_WAIT_ALL : 0),
>   				   MAX_SCHEDULE_TIMEOUT,
>   				   to_rps_client(file));
> @@ -3749,7 +3750,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	start = ktime_get();
>   
>   	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
> +				   I915_WAIT_ALL,
>   				   to_wait_timeout(args->timeout_ns),
>   				   to_rps_client(file));
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index d73ad490a261..abd4dacbab8e 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq,
>   		add_wait_queue(errq, &reset);
>   
>   	intel_wait_init(&wait);
> +	if (flags & I915_WAIT_PRIORITY)
> +		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
>   
>   restart:
>   	do {
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 5f7361e0fca6..90e9d170a0cd 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -277,8 +277,9 @@ long i915_request_wait(struct i915_request *rq,
>   	__attribute__((nonnull(1)));
>   #define I915_WAIT_INTERRUPTIBLE	BIT(0)
>   #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
> -#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
> -#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
> +#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
> +#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
> +#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
>   
>   static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
>   					    u32 seqno);
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 910ac7089596..1423088dceff 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   	return engine;
>   }
>   
> -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +static void __i915_schedule(struct i915_request *rq,
> +			    const struct i915_sched_attr *attr)
>   {
>   	struct list_head *uninitialized_var(pl);
>   	struct intel_engine_cs *engine, *last;
> @@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	const int prio = attr->priority;
>   	LIST_HEAD(dfs);
>   
> +	/* Needed in order to use the temporary link inside i915_dependency */
> +	lockdep_assert_held(&schedule_lock);
>   	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
>   
>   	if (i915_request_completed(rq))
> @@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	if (prio <= READ_ONCE(rq->sched.attr.priority))
>   		return;
>   
> -	/* Needed in order to use the temporary link inside i915_dependency */
> -	spin_lock(&schedule_lock);
> -
>   	stack.signaler = &rq->sched;
>   	list_add(&stack.dfs_link, &dfs);
>   
> @@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   		rq->sched.attr = *attr;
>   
>   		if (stack.dfs_link.next == stack.dfs_link.prev)
> -			goto out_unlock;
> +			return;
>   
>   		__list_del_entry(&stack.dfs_link);
>   	}
> @@ -371,7 +371,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> +}
>   
> -out_unlock:
> +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +{
> +	spin_lock(&schedule_lock);
> +	__i915_schedule(rq, attr);
>   	spin_unlock(&schedule_lock);
>   }
> +
> +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
> +{
> +	struct i915_sched_attr attr;
> +
> +	GEM_BUG_ON(bump & I915_PRIORITY_MASK);
> +
> +	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
> +		return;
> +
> +	spin_lock_bh(&schedule_lock);
> +
> +	attr = rq->sched.attr;
> +	attr.priority |= bump;
> +	__i915_schedule(rq, &attr);
> +
> +	spin_unlock_bh(&schedule_lock);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 8058c17ae96a..cbfb64288c61 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -24,12 +24,13 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 1
> +#define I915_USER_PRIORITY_SHIFT 2
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> +#define I915_PRIORITY_WAIT	((u8)BIT(1))
>   #define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))

Put a comment here explaining the priority order is reversed in the 
internal range.

With new client protection against being repeatedly pre-empted added in 
a respective previous patch, I am okay that we give this a go.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

>   
>   struct i915_sched_attr {
> @@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
>   void i915_schedule(struct i915_request *request,
>   		   const struct i915_sched_attr *attr);
>   
> +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
> +
>   struct list_head *
>   i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
>   
>
Chris Wilson Sept. 25, 2018, 9 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-09-24 12:29:45)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > +#define I915_PRIORITY_WAIT   ((u8)BIT(1))
> >   #define I915_PRIORITY_NEWCLIENT     ((u8)BIT(0))
> 
> Put a comment here explaining the priority order is reversed in the 
> internal range.
> 
> With new client protection against being repeatedly pre-empted added in 
> a respective previous patch, I am okay that we give this a go.

Hmm, actually it is not reversed. So you would prefer new clients to
have a small priority boost over the stalling clients (which are in
effect under control of the user). Ok.
-Chris
Tvrtko Ursulin Sept. 25, 2018, 9:07 a.m. UTC | #3
On 25/09/2018 10:00, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-09-24 12:29:45)
>>
>> On 19/09/2018 20:55, Chris Wilson wrote:
>>> +#define I915_PRIORITY_WAIT   ((u8)BIT(1))
>>>    #define I915_PRIORITY_NEWCLIENT     ((u8)BIT(0))
>>
>> Put a comment here explaining the priority order is reversed in the
>> internal range.
>>
>> With new client protection against being repeatedly pre-empted added in
>> a respective previous patch, I am okay that we give this a go.
> 
> Hmm, actually it is not reversed. So you would prefer new clients to
> have a small priority boost over the stalling clients (which are in
> effect under control of the user). Ok.

I thought it was reversed:

+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_MASK - (prio & I915_PRIORITY_MASK);
+	prio >>= I915_USER_PRIORITY_SHIFT;

But my two comments are not related - I just wanted a comment next to 
internal level definitions in case they are reversed. If they are not 
reversed it is fine without a comment. I guess the comment only applies 
to bucket organization, not the priority levels, that was my confusion..

It made sense to me that new clients would be more important than 
something stuck on a potentially long wait.. but I don't know, it's all 
guesswork.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6b347ffb996b..2fa75f2a1980 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1748,6 +1748,7 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 */
 	err = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
 				   (write_domain ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
@@ -3749,7 +3750,9 @@  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	start = ktime_get();
 
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
 				   to_wait_timeout(args->timeout_ns),
 				   to_rps_client(file));
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d73ad490a261..abd4dacbab8e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,8 @@  long i915_request_wait(struct i915_request *rq,
 		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 5f7361e0fca6..90e9d170a0cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -277,8 +277,9 @@  long i915_request_wait(struct i915_request *rq,
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
+#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
 
 static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 					    u32 seqno);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 910ac7089596..1423088dceff 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -239,7 +239,8 @@  sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_request *rq,
+			    const struct i915_sched_attr *attr)
 {
 	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
@@ -248,6 +249,8 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	const int prio = attr->priority;
 	LIST_HEAD(dfs);
 
+	/* Needed in order to use the temporary link inside i915_dependency */
+	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
 	if (i915_request_completed(rq))
@@ -256,9 +259,6 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	if (prio <= READ_ONCE(rq->sched.attr.priority))
 		return;
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	spin_lock(&schedule_lock);
-
 	stack.signaler = &rq->sched;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -312,7 +312,7 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 		rq->sched.attr = *attr;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
-			goto out_unlock;
+			return;
 
 		__list_del_entry(&stack.dfs_link);
 	}
@@ -371,7 +371,29 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
+}
 
-out_unlock:
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	spin_lock(&schedule_lock);
+	__i915_schedule(rq, attr);
 	spin_unlock(&schedule_lock);
 }
+
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
+{
+	struct i915_sched_attr attr;
+
+	GEM_BUG_ON(bump & I915_PRIORITY_MASK);
+
+	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+		return;
+
+	spin_lock_bh(&schedule_lock);
+
+	attr = rq->sched.attr;
+	attr.priority |= bump;
+	__i915_schedule(rq, &attr);
+
+	spin_unlock_bh(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 8058c17ae96a..cbfb64288c61 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,12 +24,13 @@  enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
+#define I915_PRIORITY_WAIT	((u8)BIT(1))
 #define I915_PRIORITY_NEWCLIENT	((u8)BIT(0))
 
 struct i915_sched_attr {
@@ -99,6 +100,8 @@  void i915_sched_node_fini(struct drm_i915_private *i915,
 void i915_schedule(struct i915_request *request,
 		   const struct i915_sched_attr *attr);
 
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);