diff mbox series

[v4,1/3] drm/i915: Get active pending request for given context

Message ID 1552552615-6703-2-git-send-email-ankit.p.navik@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Context aware user agnostic EU/Slice/Sub-slice control within kernel | expand

Commit Message

Ankit Navik March 14, 2019, 8:36 a.m. UTC
From: Praveen Diwakar <praveen.diwakar@intel.com>

This patch gives us the active pending request count which is yet
to be submitted to the GPU

V2:
 * Change 64-bit to atomic for request count. (Tvrtko Ursulin)

V3:
 * Remove mutex for request count.
 * Rebase.
 * Fixes hitting underflow for predictive request. (Tvrtko Ursulin)

V4:
 * Rebase.

Cc: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
Cc: Kedar J Karanje <kedar.j.karanje@intel.com>
Cc: Yogesh Marathe <yogesh.marathe@intel.com>
Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 1 +
 drivers/gpu/drm/i915/i915_gem_context.h    | 5 +++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 3 +++
 4 files changed, 11 insertions(+)

Comments

Chris Wilson March 14, 2019, 8:55 a.m. UTC | #1
Quoting Ankit Navik (2019-03-14 08:36:53)
>  static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 02adcaf..a38963b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2475,6 +2475,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>          */
>         eb.request->batch = eb.batch;
>  
> +       atomic_inc(&eb.ctx->req_cnt);
> +
>         trace_i915_request_queue(eb.request, eb.batch_flags);
>         err = eb_submit(&eb);
>  err_request:
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 34a0866..d0af37d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -780,6 +780,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  
>                         last = rq;
>                         submit = true;
> +
> +                       if (atomic_read(&rq->gem_context->req_cnt) > 0)
> +                               atomic_dec(&rq->gem_context->req_cnt);

Not atomic. But is this not a clue that you've got the model wrong?

If only we were already tracking requests within contexts.
-Chris
Tvrtko Ursulin March 14, 2019, 10:39 a.m. UTC | #2
On 14/03/2019 08:36, Ankit Navik wrote:
> From: Praveen Diwakar <praveen.diwakar@intel.com>
> 
> This patch gives us the active pending request count which is yet
> to be submitted to the GPU
> 
> V2:
>   * Change 64-bit to atomic for request count. (Tvrtko Ursulin)
> 
> V3:
>   * Remove mutex for request count.
>   * Rebase.
>   * Fixes hitting underflow for predictive request. (Tvrtko Ursulin)
> 
> V4:
>   * Rebase.
> 
> Cc: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> Cc: Kedar J Karanje <kedar.j.karanje@intel.com>
> Cc: Yogesh Marathe <yogesh.marathe@intel.com>
> Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c    | 1 +
>   drivers/gpu/drm/i915/i915_gem_context.h    | 5 +++++
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++
>   drivers/gpu/drm/i915/intel_lrc.c           | 3 +++
>   4 files changed, 11 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 280813a..a5876fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -453,6 +453,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   	}
>   
>   	trace_i915_context_create(ctx);
> +	atomic_set(&ctx->req_cnt, 0);
>   
>   	return ctx;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index ca150a7..c940168 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -227,6 +227,11 @@ struct i915_gem_context {
>   	 * context close.
>   	 */
>   	struct list_head handles_list;
> +
> +	/** req_cnt: tracks the pending commands, based on which we decide to
> +	 * go for low/medium/high load configuration of the GPU.
> +	 */
> +	atomic_t req_cnt;
>   };
>   
>   static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 02adcaf..a38963b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2475,6 +2475,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	 */
>   	eb.request->batch = eb.batch;
>   
> +	atomic_inc(&eb.ctx->req_cnt);
> +
>   	trace_i915_request_queue(eb.request, eb.batch_flags);
>   	err = eb_submit(&eb);
>   err_request:
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 34a0866..d0af37d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -780,6 +780,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   
>   			last = rq;
>   			submit = true;
> +
> +			if (atomic_read(&rq->gem_context->req_cnt) > 0)
> +				atomic_dec(&rq->gem_context->req_cnt);

Every review round I keep pointing out this is wrong and you keep 
persisting to have it like this. :( Would you in my place want to keep 
reviewing in these circumstances?

As Chris also pointed out it is not atomic so in theory doesn't even 
protect against underflow and I repeat, yet again, is a hint the 
placement is unbalanced.

I pointed you to some of my old patches which do correct accounting of 
per engine queued / runnable / running, to give an idea where to put the 
inc/dec.

Chris now also suggests maybe to tie it with timelines (AFAIU), so you 
could also see where requests are added and removed from the 
ce->ring->timeline list.

You say you tried something with my patches but "didnt see much
power benefit with that" - what exactly have you tried? And what is not 
much?

I am still curious what metric works for this. The one you implement is 
something like queued + _sometimes_ runnable. Sometimes because you may 
or may not be decrementing runnable depending on ELSP contention. And 
number of ELSP slots may change with GuC and/or Gen11 so I worry it is 
way to undefined even ignoring the underflow issue.

Regards,

Tvrtko

>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 280813a..a5876fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -453,6 +453,7 @@  i915_gem_create_context(struct drm_i915_private *dev_priv,
 	}
 
 	trace_i915_context_create(ctx);
+	atomic_set(&ctx->req_cnt, 0);
 
 	return ctx;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ca150a7..c940168 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -227,6 +227,11 @@  struct i915_gem_context {
 	 * context close.
 	 */
 	struct list_head handles_list;
+
+	/** req_cnt: tracks the pending commands, based on which we decide to
+	 * go for low/medium/high load configuration of the GPU.
+	 */
+	atomic_t req_cnt;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 02adcaf..a38963b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2475,6 +2475,8 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	 */
 	eb.request->batch = eb.batch;
 
+	atomic_inc(&eb.ctx->req_cnt);
+
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
 err_request:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 34a0866..d0af37d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -780,6 +780,9 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			last = rq;
 			submit = true;
+
+			if (atomic_read(&rq->gem_context->req_cnt) > 0)
+				atomic_dec(&rq->gem_context->req_cnt);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);