[19/22] drm/i915/execlists: Refactor out can_merge_rq()

Message ID	20190204132214.9459-20-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Mon, 4 Feb 2019 13:22:11 +0000 Message-Id: <20190204132214.9459-20-chris@chris-wilson.co.uk> In-Reply-To: <20190204132214.9459-1-chris@chris-wilson.co.uk> References: <20190204132214.9459-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 19/22] drm/i915/execlists: Refactor out can_merge_rq() Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/22] drm/i915/execlists: Suppress mere WAIT preemption \| expand [01/22] drm/i915/execlists: Suppress mere WAIT preemption [02/22] drm/i915/execlists: Suppress redundant preemption [03/22] drm/i915/selftests: Exercise some AB...BA preemption chains [04/22] drm/i915: Trim NEWCLIENT boosting [05/22] drm/i915: Show support for accurate sw PMU busyness tracking [06/22] drm/i915: Revoke mmaps and prevent access to fence registers across reset [07/22] drm/i915: Force the GPU reset upon wedging [08/22] drm/i915: Uninterruptibly drain the timelines on unwedging [09/22] drm/i915: Wait for old resets before applying debugfs/i915_wedged [10/22] drm/i915: Serialise resets with wedging [11/22] drm/i915: Don't claim an unstarted request was guilty [12/22] drm/i915: Generalise GPU activity tracking [13/22] drm/i915: Release the active tracker tree upon idling [14/22] drm/i915: Allocate active tracking nodes from a slabcache [15/22] drm/i915: Make request allocation caches global [16/22] drm/i915: Add timeline barrier support [17/22] drm/i915: Pull i915_gem_active into the i915_active family [18/22] drm/i915: Keep timeline HWSP allocated until idle across the system [19/22] drm/i915/execlists: Refactor out can_merge_rq() [20/22] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ [21/22] drm/i915: Prioritise non-busywait semaphore workloads [22/22] semaphore-no-stats

Message ID

20190204132214.9459-20-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Mon,  4 Feb 2019 13:22:11 +0000
Message-Id: <20190204132214.9459-20-chris@chris-wilson.co.uk>
In-Reply-To: <20190204132214.9459-1-chris@chris-wilson.co.uk>
References: <20190204132214.9459-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 19/22] drm/i915/execlists: Refactor out
 can_merge_rq()
Precedence: list
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Series

[01/22] drm/i915/execlists: Suppress mere WAIT preemption | expand

Commit Message

Chris Wilson Feb. 4, 2019, 1:22 p.m. UTC

In the next patch, we add another user that wants to check whether
requests can be merge into a single HW execution, and in the future we
want to add more conditions under which requests from the same context
cannot be merge. In preparation, extract out can_merge_rq().

v2: Reorder tests to decide if we can continue filling ELSP and bonus
comments.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 35 ++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 11 deletions(-)

Comments

Tvrtko Ursulin Feb. 4, 2019, 7:02 p.m. UTC | #1

On 04/02/2019 13:22, Chris Wilson wrote:
> In the next patch, we add another user that wants to check whether
> requests can be merge into a single HW execution, and in the future we
> want to add more conditions under which requests from the same context
> cannot be merge. In preparation, extract out can_merge_rq().
> 
> v2: Reorder tests to decide if we can continue filling ELSP and bonus
> comments.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 35 ++++++++++++++++++++++----------
>   1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e37f207afb5a..66d465708bc6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -285,12 +285,11 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   }
>   
>   __maybe_unused static inline bool
> -assert_priority_queue(const struct intel_engine_execlists *execlists,
> -		      const struct i915_request *prev,
> +assert_priority_queue(const struct i915_request *prev,
>   		      const struct i915_request *next)
>   {
> -	if (!prev)
> -		return true;
> +	const struct intel_engine_execlists *execlists =
> +		&prev->engine->execlists;
>   
>   	/*
>   	 * Without preemption, the prev may refer to the still active element
> @@ -601,6 +600,17 @@ static bool can_merge_ctx(const struct intel_context *prev,
>   	return true;
>   }
>   
> +static bool can_merge_rq(const struct i915_request *prev,
> +			 const struct i915_request *next)
> +{
> +	GEM_BUG_ON(!assert_priority_queue(prev, next));
> +
> +	if (!can_merge_ctx(prev->hw_context, next->hw_context))
> +		return false;
> +
> +	return true;
> +}
> +
>   static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   {
>   	GEM_BUG_ON(rq == port_request(port));
> @@ -753,8 +763,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		int i;
>   
>   		priolist_for_each_request_consume(rq, rn, p, i) {
> -			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
> -
>   			/*
>   			 * Can we combine this request with the current port?
>   			 * It has to be the same context/ringbuffer and not
> @@ -766,8 +774,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			 * second request, and so we never need to tell the
>   			 * hardware about the first.
>   			 */
> -			if (last &&
> -			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
> +			if (last && !can_merge_rq(last, rq)) {
>   				/*
>   				 * If we are on the second port and cannot
>   				 * combine this request with the last, then we
> @@ -776,6 +783,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				if (port == last_port)
>   					goto done;
>   
> +				/*
> +				 * We must not populate both ELSP[] with the
> +				 * same LRCA, i.e. we must submit 2 different
> +				 * contexts if we submit 2 ELSP.
> +				 */
> +				if (last->hw_context == rq->hw_context)
> +					goto done;
> +
>   				/*
>   				 * If GVT overrides us we only ever submit
>   				 * port[0], leaving port[1] empty. Note that we
> @@ -787,7 +802,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				    ctx_single_port_submission(rq->hw_context))
>   					goto done;
>   
> -				GEM_BUG_ON(last->hw_context == rq->hw_context);
>   
>   				if (submit)
>   					port_assign(port, last);
> @@ -826,8 +840,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * request triggering preemption on the next dequeue (or subsequent
>   	 * interrupt for secondary ports).
>   	 */
> -	execlists->queue_priority_hint =
> -		port != execlists->port ? rq_prio(last) : INT_MIN;
> +	execlists->queue_priority_hint = queue_prio(execlists);
>   
>   	if (submit) {
>   		port_assign(port, last);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e37f207afb5a..66d465708bc6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -285,12 +285,11 @@  static inline bool need_preempt(const struct intel_engine_cs *engine,
 }
 
 __maybe_unused static inline bool
-assert_priority_queue(const struct intel_engine_execlists *execlists,
-		      const struct i915_request *prev,
+assert_priority_queue(const struct i915_request *prev,
 		      const struct i915_request *next)
 {
-	if (!prev)
-		return true;
+	const struct intel_engine_execlists *execlists =
+		&prev->engine->execlists;
 
 	/*
 	 * Without preemption, the prev may refer to the still active element
@@ -601,6 +600,17 @@  static bool can_merge_ctx(const struct intel_context *prev,
 	return true;
 }
 
+static bool can_merge_rq(const struct i915_request *prev,
+			 const struct i915_request *next)
+{
+	GEM_BUG_ON(!assert_priority_queue(prev, next));
+
+	if (!can_merge_ctx(prev->hw_context, next->hw_context))
+		return false;
+
+	return true;
+}
+
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(rq == port_request(port));
@@ -753,8 +763,6 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
-
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -766,8 +774,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last &&
-			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
+			if (last && !can_merge_rq(last, rq)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
@@ -776,6 +783,14 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (port == last_port)
 					goto done;
 
+				/*
+				 * We must not populate both ELSP[] with the
+				 * same LRCA, i.e. we must submit 2 different
+				 * contexts if we submit 2 ELSP.
+				 */
+				if (last->hw_context == rq->hw_context)
+					goto done;
+
 				/*
 				 * If GVT overrides us we only ever submit
 				 * port[0], leaving port[1] empty. Note that we
@@ -787,7 +802,6 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 				    ctx_single_port_submission(rq->hw_context))
 					goto done;
 
-				GEM_BUG_ON(last->hw_context == rq->hw_context);
 
 				if (submit)
 					port_assign(port, last);
@@ -826,8 +840,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * request triggering preemption on the next dequeue (or subsequent
 	 * interrupt for secondary ports).
 	 */
-	execlists->queue_priority_hint =
-		port != execlists->port ? rq_prio(last) : INT_MIN;
+	execlists->queue_priority_hint = queue_prio(execlists);
 
 	if (submit) {
 		port_assign(port, last);

[19/22] drm/i915/execlists: Refactor out can_merge_rq()

Commit Message

Comments

Patch