diff mbox series

[3/6] drm/i915/execlists: Force single submission for sentinels

Message ID 20200319091943.7815-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/6] drm/i915: Prefer '%ps' for printing function symbol names | expand

Commit Message

Chris Wilson March 19, 2020, 9:19 a.m. UTC
Currently, we only combine a sentinel request with a max-priority
barrier such that a sentinel request is always in ELSP[0] with nothing
following it. However, we will want to create similar ELSP[] submissions
providing a full-barrier in the submission queue, but without forcing
maximum priority. As such I915_FENCE_FLAG_SENTINEL takes on the
single-submission property and so we can remove the gvt special casing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.h       | 24 +++++++-------
 drivers/gpu/drm/i915/gt/intel_context_types.h |  4 +--
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 33 +++++--------------
 drivers/gpu/drm/i915/gvt/scheduler.c          |  7 ++--
 4 files changed, 26 insertions(+), 42 deletions(-)

Comments

Tvrtko Ursulin March 19, 2020, 2:31 p.m. UTC | #1
On 19/03/2020 09:19, Chris Wilson wrote:
> Currently, we only combine a sentinel request with a max-priority
> barrier such that a sentinel request is always in ELSP[0] with nothing
> following it. However, we will want to create similar ELSP[] submissions
> providing a full-barrier in the submission queue, but without forcing
> maximum priority. As such I915_FENCE_FLAG_SENTINEL takes on the
> single-submission property and so we can remove the gvt special casing.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.h       | 24 +++++++-------
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  4 +--
>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 33 +++++--------------
>   drivers/gpu/drm/i915/gvt/scheduler.c          |  7 ++--
>   4 files changed, 26 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 18efad255124..ee5d47165c12 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -198,18 +198,6 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
>   	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>   }
>   
> -static inline bool
> -intel_context_force_single_submission(const struct intel_context *ce)
> -{
> -	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
> -}
> -
> -static inline void
> -intel_context_set_single_submission(struct intel_context *ce)
> -{
> -	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
> -}
> -
>   static inline bool
>   intel_context_nopreempt(const struct intel_context *ce)
>   {
> @@ -228,6 +216,18 @@ intel_context_clear_nopreempt(struct intel_context *ce)
>   	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
>   }
>   
> +static inline bool
> +intel_context_is_gvt(const struct intel_context *ce)
> +{
> +	return test_bit(CONTEXT_GVT, &ce->flags);
> +}
> +
> +static inline void
> +intel_context_set_gvt(struct intel_context *ce)
> +{
> +	set_bit(CONTEXT_GVT, &ce->flags);
> +}
> +
>   static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
>   {
>   	const u32 period =
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 0f3b68b95c56..fd2703efc10c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -64,8 +64,8 @@ struct intel_context {
>   #define CONTEXT_VALID_BIT		2
>   #define CONTEXT_USE_SEMAPHORES		3
>   #define CONTEXT_BANNED			4
> -#define CONTEXT_FORCE_SINGLE_SUBMISSION	5
> -#define CONTEXT_NOPREEMPT		6
> +#define CONTEXT_NOPREEMPT		5
> +#define CONTEXT_GVT			6
>   
>   	u32 *lrc_reg_state;
>   	u64 lrc_desc;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 112531b29f59..f0c4084c5b9a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1579,22 +1579,10 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>   		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>   }
>   
> -static bool ctx_single_port_submission(const struct intel_context *ce)
> -{
> -	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
> -		intel_context_force_single_submission(ce));
> -}
> -
>   static bool can_merge_ctx(const struct intel_context *prev,
>   			  const struct intel_context *next)
>   {
> -	if (prev != next)
> -		return false;
> -
> -	if (ctx_single_port_submission(prev))
> -		return false;
> -
> -	return true;
> +	return prev == next;
>   }
>   
>   static unsigned long i915_request_flags(const struct i915_request *rq)
> @@ -1844,6 +1832,12 @@ static inline void clear_ports(struct i915_request **ports, int count)
>   	memset_p((void **)ports, NULL, count);
>   }
>   
> +static bool has_sentinel(struct i915_request *prev, struct i915_request *next)
> +{
> +	return (i915_request_flags(prev) | i915_request_flags(next)) &
> +		BIT(I915_FENCE_FLAG_SENTINEL);
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -2125,18 +2119,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   				if (last->context == rq->context)
>   					goto done;
>   
> -				if (i915_request_has_sentinel(last))
> -					goto done;
> -
> -				/*
> -				 * If GVT overrides us we only ever submit
> -				 * port[0], leaving port[1] empty. Note that we
> -				 * also have to be careful that we don't queue
> -				 * the same context (even though a different
> -				 * request) to the second port.
> -				 */
> -				if (ctx_single_port_submission(last->context) ||
> -				    ctx_single_port_submission(rq->context))
> +				if (has_sentinel(last, rq))
>   					goto done;

I am only confused by can_merge_rq saying two sentinel requests can be
merged together:

	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
		      BIT(I915_FENCE_FLAG_SENTINEL))))
		return false;

What am I missing?

Regards,

Tvrtko

>   
>   				merge = false;
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 1c95bf8cbed0..4fccf4b194b0 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -204,9 +204,9 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
>   	return 0;
>   }
>   
> -static inline bool is_gvt_request(struct i915_request *rq)
> +static inline bool is_gvt_request(const struct i915_request *rq)
>   {
> -	return intel_context_force_single_submission(rq->context);
> +	return intel_context_is_gvt(rq->context);
>   }
>   
>   static void save_ring_hw_state(struct intel_vgpu *vgpu,
> @@ -401,6 +401,7 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
>   		return PTR_ERR(rq);
>   	}
>   
> +	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
>   	workload->req = i915_request_get(rq);
>   	return 0;
>   }
> @@ -1226,7 +1227,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
>   
>   		i915_vm_put(ce->vm);
>   		ce->vm = i915_vm_get(&ppgtt->vm);
> -		intel_context_set_single_submission(ce);
> +		intel_context_set_gvt(ce);
>   
>   		/* Max ring buffer size */
>   		if (!intel_uc_wants_guc_submission(&engine->gt->uc)) {
>
Chris Wilson March 19, 2020, 3:02 p.m. UTC | #2
Quoting Tvrtko Ursulin (2020-03-19 14:31:36)
> 
> On 19/03/2020 09:19, Chris Wilson wrote:
> > +                             if (has_sentinel(last, rq))
> >                                       goto done;
> 
> I am only confused by can_merge_rq saying two sentinel requests can be
> merged together:
> 
>         if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
>                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
>                       BIT(I915_FENCE_FLAG_SENTINEL))))
>                 return false;
> 
> What am I missing?

I thought it was fine to coalesce consecutive sentinels within the
context into one.

Except you're thinking about gvt, and not my usage :|
-Chris
Tvrtko Ursulin March 19, 2020, 3:11 p.m. UTC | #3
On 19/03/2020 15:02, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-03-19 14:31:36)
>>
>> On 19/03/2020 09:19, Chris Wilson wrote:
>>> +                             if (has_sentinel(last, rq))
>>>                                        goto done;
>>
>> I am only confused by can_merge_rq saying two sentinel requests can be
>> merged together:
>>
>>          if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
>>                       (BIT(I915_FENCE_FLAG_NOPREEMPT) |
>>                        BIT(I915_FENCE_FLAG_SENTINEL))))
>>                  return false;
>>
>> What am I missing?
> 
> I thought it was fine to coalesce consecutive sentinels within the
> context into one.
> 
> Except you're thinking about gvt, and not my usage :|

Sentinel is like "only one context in elsp at a time", right? This is 
what GVT wants. And for the active barrier we want single elsp and not 
coalesced with non-sentinel from the same context. But sentinels are 
kernel context, so should be fine. Although it still may be clearer to 
make then not coalesce as well.

Regards,

Tvrtko
Chris Wilson March 19, 2020, 3:21 p.m. UTC | #4
Quoting Tvrtko Ursulin (2020-03-19 15:11:49)
> 
> On 19/03/2020 15:02, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-03-19 14:31:36)
> >>
> >> On 19/03/2020 09:19, Chris Wilson wrote:
> >>> +                             if (has_sentinel(last, rq))
> >>>                                        goto done;
> >>
> >> I am only confused by can_merge_rq saying two sentinel requests can be
> >> merged together:
> >>
> >>          if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
> >>                       (BIT(I915_FENCE_FLAG_NOPREEMPT) |
> >>                        BIT(I915_FENCE_FLAG_SENTINEL))))
> >>                  return false;
> >>
> >> What am I missing?
> > 
> > I thought it was fine to coalesce consecutive sentinels within the
> > context into one.
> > 
> > Except you're thinking about gvt, and not my usage :|
> 
> Sentinel is like "only one context in elsp at a time", right?
> This is what GVT wants.

GVT wants one request. For my purpose, it was just one context.

> And for the active barrier we want single elsp and not 
> coalesced with non-sentinel from the same context. But sentinels are 
> kernel context, so should be fine. Although it still may be clearer to 
> make then not coalesce as well.

The frequency should of non-barrier operations along the kernel context
should not be high enough that we gain anything by coalescing mixed
barrier/non-barrier request streams. I hope.

On the other hand we do want to coalesce NOPREEMPT streams. Oh well, my
hope for pulling it all in one bitops seems to be fading away.

First though, I have to answer the question of how I broke persistence.
-Chris
Chris Wilson March 19, 2020, 3:31 p.m. UTC | #5
Quoting Chris Wilson (2020-03-19 15:21:41)
> First though, I have to answer the question of how I broke persistence.

Fwiw, it's the await_active update. Time to double check whether it is
flushing the barrier on demand.
-Chris
Chris Wilson March 19, 2020, 4:58 p.m. UTC | #6
Quoting Chris Wilson (2020-03-19 15:31:41)
> Quoting Chris Wilson (2020-03-19 15:21:41)
> > First though, I have to answer the question of how I broke persistence.
> 
> Fwiw, it's the await_active update. Time to double check whether it is
> flushing the barrier on demand.

And it's because I didn't take the lingering preallocated barriers into
account.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 18efad255124..ee5d47165c12 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -198,18 +198,6 @@  static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
-static inline bool
-intel_context_force_single_submission(const struct intel_context *ce)
-{
-	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
-}
-
-static inline void
-intel_context_set_single_submission(struct intel_context *ce)
-{
-	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
-}
-
 static inline bool
 intel_context_nopreempt(const struct intel_context *ce)
 {
@@ -228,6 +216,18 @@  intel_context_clear_nopreempt(struct intel_context *ce)
 	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
 }
 
+static inline bool
+intel_context_is_gvt(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_GVT, &ce->flags);
+}
+
+static inline void
+intel_context_set_gvt(struct intel_context *ce)
+{
+	set_bit(CONTEXT_GVT, &ce->flags);
+}
+
 static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
 {
 	const u32 period =
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 0f3b68b95c56..fd2703efc10c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -64,8 +64,8 @@  struct intel_context {
 #define CONTEXT_VALID_BIT		2
 #define CONTEXT_USE_SEMAPHORES		3
 #define CONTEXT_BANNED			4
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	5
-#define CONTEXT_NOPREEMPT		6
+#define CONTEXT_NOPREEMPT		5
+#define CONTEXT_GVT			6
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 112531b29f59..f0c4084c5b9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1579,22 +1579,10 @@  static void execlists_submit_ports(struct intel_engine_cs *engine)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
 }
 
-static bool ctx_single_port_submission(const struct intel_context *ce)
-{
-	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-		intel_context_force_single_submission(ce));
-}
-
 static bool can_merge_ctx(const struct intel_context *prev,
 			  const struct intel_context *next)
 {
-	if (prev != next)
-		return false;
-
-	if (ctx_single_port_submission(prev))
-		return false;
-
-	return true;
+	return prev == next;
 }
 
 static unsigned long i915_request_flags(const struct i915_request *rq)
@@ -1844,6 +1832,12 @@  static inline void clear_ports(struct i915_request **ports, int count)
 	memset_p((void **)ports, NULL, count);
 }
 
+static bool has_sentinel(struct i915_request *prev, struct i915_request *next)
+{
+	return (i915_request_flags(prev) | i915_request_flags(next)) &
+		BIT(I915_FENCE_FLAG_SENTINEL);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2125,18 +2119,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (last->context == rq->context)
 					goto done;
 
-				if (i915_request_has_sentinel(last))
-					goto done;
-
-				/*
-				 * If GVT overrides us we only ever submit
-				 * port[0], leaving port[1] empty. Note that we
-				 * also have to be careful that we don't queue
-				 * the same context (even though a different
-				 * request) to the second port.
-				 */
-				if (ctx_single_port_submission(last->context) ||
-				    ctx_single_port_submission(rq->context))
+				if (has_sentinel(last, rq))
 					goto done;
 
 				merge = false;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1c95bf8cbed0..4fccf4b194b0 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -204,9 +204,9 @@  static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
-static inline bool is_gvt_request(struct i915_request *rq)
+static inline bool is_gvt_request(const struct i915_request *rq)
 {
-	return intel_context_force_single_submission(rq->context);
+	return intel_context_is_gvt(rq->context);
 }
 
 static void save_ring_hw_state(struct intel_vgpu *vgpu,
@@ -401,6 +401,7 @@  intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 		return PTR_ERR(rq);
 	}
 
+	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
 	workload->req = i915_request_get(rq);
 	return 0;
 }
@@ -1226,7 +1227,7 @@  int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 
 		i915_vm_put(ce->vm);
 		ce->vm = i915_vm_get(&ppgtt->vm);
-		intel_context_set_single_submission(ce);
+		intel_context_set_gvt(ce);
 
 		/* Max ring buffer size */
 		if (!intel_uc_wants_guc_submission(&engine->gt->uc)) {