[6/6] drm/i915: Priority boost for waiting clients

Message ID	20180806083017.32215-6-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Mon, 6 Aug 2018 09:30:17 +0100 Message-Id: <20180806083017.32215-6-chris@chris-wilson.co.uk> In-Reply-To: <20180806083017.32215-1-chris@chris-wilson.co.uk> References: <20180806083017.32215-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 6/6] drm/i915: Priority boost for waiting clients Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[1/6] drm/i915: Limit C-states when waiting for the active request \| expand [1/6] drm/i915: Limit C-states when waiting for the active request [2/6] drm/i915: Reserve some priority bits for internal use [3/6] drm/i915: Combine multiple internal plists into the same i915_priolist bucket [4/6] drm/i915: Priority boost for new clients [5/6] drm/i915: Pull scheduling under standalone lock [6/6] drm/i915: Priority boost for waiting clients

Message ID

20180806083017.32215-6-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Mon,  6 Aug 2018 09:30:17 +0100
Message-Id: <20180806083017.32215-6-chris@chris-wilson.co.uk>
In-Reply-To: <20180806083017.32215-1-chris@chris-wilson.co.uk>
References: <20180806083017.32215-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 6/6] drm/i915: Priority boost for waiting clients
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Series

[1/6] drm/i915: Limit C-states when waiting for the active request | expand

Commit Message

Chris Wilson Aug. 6, 2018, 8:30 a.m. UTC

Latency is in the eye of the beholder. In the case where a client stops
and waits for the gpu, give that request chain a small priority boost
(not so that it overtakes higher priority clients, to preserve the
external ordering) so that ideally the wait completes earlier.

Testcase: igt/gem_sync/switch-default
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
 drivers/gpu/drm/i915/i915_request.c   |  2 ++
 drivers/gpu/drm/i915/i915_request.h   |  5 ++--
 drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
 5 files changed, 41 insertions(+), 10 deletions(-)

Comments

Tvrtko Ursulin Aug. 6, 2018, 9:53 a.m. UTC | #1

On 06/08/2018 09:30, Chris Wilson wrote:
> Latency is in the eye of the beholder. In the case where a client stops
> and waits for the gpu, give that request chain a small priority boost
> (not so that it overtakes higher priority clients, to preserve the
> external ordering) so that ideally the wait completes earlier.

Going back to my comments made against the new clients boost patch - 
perhaps if you make it that preemption is not triggered for within 
internal priority delta maybe it is all acceptable. Not sure how much of 
the positive effect you observed remains though.

Regards,

Tvrtko

> 
> Testcase: igt/gem_sync/switch-default
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c       |  5 +++-
>   drivers/gpu/drm/i915/i915_request.c   |  2 ++
>   drivers/gpu/drm/i915/i915_request.h   |  5 ++--
>   drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++-----
>   drivers/gpu/drm/i915/i915_scheduler.h |  5 +++-
>   5 files changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 460f256114f7..033d8057dd83 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	 */
>   	err = i915_gem_object_wait(obj,
>   				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
>   				   (write_domain ? I915_WAIT_ALL : 0),
>   				   MAX_SCHEDULE_TIMEOUT,
>   				   to_rps_client(file));
> @@ -3733,7 +3734,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	start = ktime_get();
>   
>   	ret = i915_gem_object_wait(obj,
> -				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
> +				   I915_WAIT_INTERRUPTIBLE |
> +				   I915_WAIT_PRIORITY |
> +				   I915_WAIT_ALL,
>   				   to_wait_timeout(args->timeout_ns),
>   				   to_rps_client(file));
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e33cdd95bdc3..105a9e45277b 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1289,6 +1289,8 @@ long i915_request_wait(struct i915_request *rq,
>   		add_wait_queue(errq, &reset);
>   
>   	intel_wait_init(&wait);
> +	if (flags & I915_WAIT_PRIORITY)
> +		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
>   
>   restart:
>   	do {
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 549d56d0ba1c..2898ca74fa27 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -269,8 +269,9 @@ long i915_request_wait(struct i915_request *rq,
>   	__attribute__((nonnull(1)));
>   #define I915_WAIT_INTERRUPTIBLE	BIT(0)
>   #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
> -#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
> -#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
> +#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
> +#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
> +#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
>   
>   static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 749a192b4628..2da651db5029 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   	return engine;
>   }
>   
> -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +static void __i915_schedule(struct i915_request *rq,
> +			    const struct i915_sched_attr *attr)
>   {
>   	struct list_head *uninitialized_var(pl);
>   	struct intel_engine_cs *engine, *last;
> @@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	const int prio = attr->priority;
>   	LIST_HEAD(dfs);
>   
> +	/* Needed in order to use the temporary link inside i915_dependency */
> +	lockdep_assert_held(&schedule_lock);
>   	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
>   
>   	if (i915_request_completed(rq))
> @@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	if (prio <= READ_ONCE(rq->sched.attr.priority))
>   		return;
>   
> -	/* Needed in order to use the temporary link inside i915_dependency */
> -	spin_lock(&schedule_lock);
> -
>   	stack.signaler = &rq->sched;
>   	list_add(&stack.dfs_link, &dfs);
>   
> @@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   		rq->sched.attr = *attr;
>   
>   		if (stack.dfs_link.next == stack.dfs_link.prev)
> -			goto out_unlock;
> +			return;
>   
>   		__list_del_entry(&stack.dfs_link);
>   	}
> @@ -350,7 +350,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> +}
>   
> -out_unlock:
> +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> +{
> +	spin_lock(&schedule_lock);
> +	__i915_schedule(rq, attr);
>   	spin_unlock(&schedule_lock);
>   }
> +
> +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
> +{
> +	struct i915_sched_attr attr;
> +
> +	GEM_BUG_ON(bump & I915_PRIORITY_MASK);
> +
> +	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
> +		return;
> +
> +	spin_lock_bh(&schedule_lock);
> +
> +	attr = rq->sched.attr;
> +	attr.priority |= bump;
> +	__i915_schedule(rq, &attr);
> +
> +	spin_unlock_bh(&schedule_lock);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 9b21ff72f619..e5120ef974d4 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -24,12 +24,13 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 1
> +#define I915_USER_PRIORITY_SHIFT 2
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> +#define I915_PRIORITY_WAIT BIT(1)
>   #define I915_PRIORITY_NEWCLIENT BIT(0)
>   
>   struct i915_sched_attr {
> @@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
>   void i915_schedule(struct i915_request *request,
>   		   const struct i915_sched_attr *attr);
>   
> +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
> +
>   struct list_head *
>   i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
>   
>

Chris Wilson Aug. 6, 2018, 10:03 a.m. UTC | #2

Quoting Tvrtko Ursulin (2018-08-06 10:53:52)
> 
> On 06/08/2018 09:30, Chris Wilson wrote:
> > Latency is in the eye of the beholder. In the case where a client stops
> > and waits for the gpu, give that request chain a small priority boost
> > (not so that it overtakes higher priority clients, to preserve the
> > external ordering) so that ideally the wait completes earlier.
> 
> Going back to my comments made against the new clients boost patch - 
> perhaps if you make it that preemption is not triggered for within 
> internal priority delta maybe it is all acceptable. Not sure how much of 
> the positive effect you observed remains though.

That entirely defeats the point of being able to preempt within the same
user priority; so no more "fairness" wrt FQ_CODEL and no more cheating
here.
-Chris

Tvrtko Ursulin Aug. 6, 2018, 10:30 a.m. UTC | #3

On 06/08/2018 11:03, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-06 10:53:52)
>>
>> On 06/08/2018 09:30, Chris Wilson wrote:
>>> Latency is in the eye of the beholder. In the case where a client stops
>>> and waits for the gpu, give that request chain a small priority boost
>>> (not so that it overtakes higher priority clients, to preserve the
>>> external ordering) so that ideally the wait completes earlier.
>>
>> Going back to my comments made against the new clients boost patch -
>> perhaps if you make it that preemption is not triggered for within
>> internal priority delta maybe it is all acceptable. Not sure how much of
>> the positive effect you observed remains though.
> 
> That entirely defeats the point of being able to preempt within the same
> user priority; so no more "fairness" wrt FQ_CODEL and no more cheating
> here.

In this case postpone these tweaks until after we implement time-slicing? :)

Regards,

Tvrtko

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 460f256114f7..033d8057dd83 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1748,6 +1748,7 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 */
 	err = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
 				   (write_domain ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
@@ -3733,7 +3734,9 @@  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	start = ktime_get();
 
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
 				   to_wait_timeout(args->timeout_ns),
 				   to_rps_client(file));
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e33cdd95bdc3..105a9e45277b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1289,6 +1289,8 @@  long i915_request_wait(struct i915_request *rq,
 		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 549d56d0ba1c..2898ca74fa27 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -269,8 +269,9 @@  long i915_request_wait(struct i915_request *rq,
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
+#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 749a192b4628..2da651db5029 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -239,7 +239,8 @@  sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 	return engine;
 }
 
-void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+static void __i915_schedule(struct i915_request *rq,
+			    const struct i915_sched_attr *attr)
 {
 	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
@@ -248,6 +249,8 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	const int prio = attr->priority;
 	LIST_HEAD(dfs);
 
+	/* Needed in order to use the temporary link inside i915_dependency */
+	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
 	if (i915_request_completed(rq))
@@ -256,9 +259,6 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	if (prio <= READ_ONCE(rq->sched.attr.priority))
 		return;
 
-	/* Needed in order to use the temporary link inside i915_dependency */
-	spin_lock(&schedule_lock);
-
 	stack.signaler = &rq->sched;
 	list_add(&stack.dfs_link, &dfs);
 
@@ -312,7 +312,7 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 		rq->sched.attr = *attr;
 
 		if (stack.dfs_link.next == stack.dfs_link.prev)
-			goto out_unlock;
+			return;
 
 		__list_del_entry(&stack.dfs_link);
 	}
@@ -350,7 +350,29 @@  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
+}
 
-out_unlock:
+void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
+{
+	spin_lock(&schedule_lock);
+	__i915_schedule(rq, attr);
 	spin_unlock(&schedule_lock);
 }
+
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
+{
+	struct i915_sched_attr attr;
+
+	GEM_BUG_ON(bump & I915_PRIORITY_MASK);
+
+	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+		return;
+
+	spin_lock_bh(&schedule_lock);
+
+	attr = rq->sched.attr;
+	attr.priority |= bump;
+	__i915_schedule(rq, &attr);
+
+	spin_unlock_bh(&schedule_lock);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 9b21ff72f619..e5120ef974d4 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,12 +24,13 @@  enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
+#define I915_PRIORITY_WAIT BIT(1)
 #define I915_PRIORITY_NEWCLIENT BIT(0)
 
 struct i915_sched_attr {
@@ -99,6 +100,8 @@  void i915_sched_node_fini(struct drm_i915_private *i915,
 void i915_schedule(struct i915_request *request,
 		   const struct i915_sched_attr *attr);
 
+void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
+
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);

[6/6] drm/i915: Priority boost for waiting clients

Commit Message

Comments

Patch