[4/6] drm/i915: Track per-context engine busyness

Message ID	20200207161331.23447-5-tvrtko.ursulin@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=K8vl=33=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 143B121775 From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> To: Intel-gfx@lists.freedesktop.org Date: Fri, 7 Feb 2020 16:13:29 +0000 Message-Id: <20200207161331.23447-5-tvrtko.ursulin@linux.intel.com> In-Reply-To: <20200207161331.23447-1-tvrtko.ursulin@linux.intel.com> References: <20200207161331.23447-1-tvrtko.ursulin@linux.intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 4/6] drm/i915: Track per-context engine busyness Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	Per client engine busyness \| expand [RFC,0/8] Per client engine busyness [1/6] drm/i915: Expose list of clients in sysfs [2/6] drm/i915: Update client name on context create [3/6] drm/i915: Make GEM contexts track DRM clients [4/6] drm/i915: Track per-context engine busyness [5/6] drm/i915: Track per drm client engine class busyness [6/6] drm/i915: Expose per-engine client busyness

Message ID

20200207161331.23447-5-tvrtko.ursulin@linux.intel.com (mailing list archive)

State

New, archived

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 143B121775
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Intel-gfx@lists.freedesktop.org
Date: Fri,  7 Feb 2020 16:13:29 +0000
Message-Id: <20200207161331.23447-5-tvrtko.ursulin@linux.intel.com>
In-Reply-To: <20200207161331.23447-1-tvrtko.ursulin@linux.intel.com>
References: <20200207161331.23447-1-tvrtko.ursulin@linux.intel.com>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 4/6] drm/i915: Track per-context engine busyness
Precedence: list
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Series

Per client engine busyness | expand

Commit Message

Tvrtko Ursulin Feb. 7, 2020, 4:13 p.m. UTC

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Some customers want to know how much of the GPU time are their clients
using in order to make dynamic load balancing decisions.

With the hooks already in place which track the overall engine busyness,
we can extend that slightly to split that time between contexts.

v2: Fix accounting for tail updates.
v3: Rebase.
v4: Mark currently running contexts as active on stats enable.
v5: Include some headers to fix the build.
v6: Added fine grained lock.
v7: Convert to seqlock. (Chris Wilson)
v8: Rebase and tidy with helpers.
v9: Refactor.
v10: Move recording start to promotion. (Chris)
v11: Consolidate duplicated code. (Chris)
v12: execlists->active cannot be NULL. (Chris)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 20 ++++++++
 drivers/gpu/drm/i915/gt/intel_context.h       | 13 ++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  9 ++++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 15 +++++-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 46 ++++++++++++++++---
 5 files changed, 95 insertions(+), 8 deletions(-)

Comments

Chris Wilson Feb. 9, 2020, 11:02 a.m. UTC | #1

Quoting Tvrtko Ursulin (2020-02-07 16:13:29)
>                 } else {
> -                       GEM_BUG_ON(!*execlists->active);
> +                       rq = *execlists->active++;
> +                       GEM_BUG_ON(!rq);
> +
> +                       GEM_BUG_ON(execlists->active - execlists->inflight >
> +                                  execlists_num_ports(execlists));
>  
>                         /* port0 completed, advanced to port1 */
>                         trace_ports(execlists, "completed", execlists->active);
> @@ -2327,13 +2359,15 @@ static void process_csb(struct intel_engine_cs *engine)
>                          * coherent (visible from the CPU) before the
>                          * user interrupt and CSB is processed.
>                          */
> -                       GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
> +                       GEM_BUG_ON(!i915_request_completed(rq) &&
>                                    !reset_in_progress(execlists));
> -                       execlists_schedule_out(*execlists->active++);
>  
> -                       GEM_BUG_ON(execlists->active - execlists->inflight >
> -                                  execlists_num_ports(execlists));
> +                       execlists_schedule_out(rq);

We don't need to touch this branch...

>                 }
> +
> +               rq = *execlists->active;
> +               if (rq)
> +                       intel_context_stats_start(rq->context);

As this can be done after the loop. I think combining it with the
set_timeslice would be nice (both are 'time' related).

>         } while (head != tail);
>  
>         execlists->csb_head = head;

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 57e8a051ddc2..c8669036c303 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -286,6 +286,7 @@  intel_context_init(struct intel_context *ce,
 	INIT_LIST_HEAD(&ce->signals);
 
 	mutex_init(&ce->pin_mutex);
+	seqlock_init(&ce->stats.lock);
 
 	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire);
@@ -380,6 +381,25 @@  struct i915_request *intel_context_create_request(struct intel_context *ce)
 	return rq;
 }
 
+ktime_t intel_context_get_busy_time(struct intel_context *ce)
+{
+	unsigned int seq;
+	ktime_t total;
+
+	do {
+		seq = read_seqbegin(&ce->stats.lock);
+
+		total = ce->stats.total;
+
+		if (ce->stats.active)
+			total = ktime_add(total,
+					  ktime_sub(ktime_get(),
+						    ce->stats.start));
+	} while (read_seqretry(&ce->stats.lock, seq));
+
+	return total;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 604d5cfc46ba..871196aa7d5a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -227,4 +227,17 @@  intel_context_clear_nopreempt(struct intel_context *ce)
 	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
 }
 
+static inline void
+__intel_context_stats_start(struct intel_context *ce, ktime_t now)
+{
+	struct intel_context_stats *stats = &ce->stats;
+
+	if (!stats->active) {
+		stats->start = now;
+		stats->active = true;
+	}
+}
+
+ktime_t intel_context_get_busy_time(struct intel_context *ce);
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca1420fb8b53..963d33dc5289 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -11,6 +11,7 @@ 
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
+#include <linux/seqlock.h>
 
 #include "i915_active_types.h"
 #include "i915_utils.h"
@@ -83,6 +84,14 @@  struct intel_context {
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
+
+	/** stats: Context GPU engine busyness tracking. */
+	struct intel_context_stats {
+		seqlock_t lock;
+		bool active;
+		ktime_t start;
+		ktime_t total;
+	} stats;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b1c7b1ed6149..a5344d2c7c10 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1599,8 +1599,19 @@  int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		engine->stats.enabled_at = ktime_get();
 
-		/* XXX submission method oblivious? */
-		for (port = execlists->active; (rq = *port); port++)
+		/*
+		 * Mark currently running context as active.
+		 * XXX submission method oblivious?
+		 */
+
+		rq = NULL;
+		port = execlists->active;
+		rq = *port;
+		if (rq)
+			__intel_context_stats_start(rq->context,
+						    engine->stats.enabled_at);
+
+		for (; (rq = *port); port++)
 			engine->stats.active++;
 
 		for (port = execlists->pending; (rq = *port); port++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b350e01d86d2..1cd38b6eb189 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1197,6 +1197,32 @@  static void reset_active(struct i915_request *rq,
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
+static void intel_context_stats_start(struct intel_context *ce)
+{
+	struct intel_context_stats *stats = &ce->stats;
+	unsigned long flags;
+
+	write_seqlock_irqsave(&stats->lock, flags);
+	__intel_context_stats_start(ce, ktime_get());
+	write_sequnlock_irqrestore(&stats->lock, flags);
+}
+
+static void intel_context_stats_stop(struct intel_context *ce)
+{
+	struct intel_context_stats *stats = &ce->stats;
+	unsigned long flags;
+
+	if (!READ_ONCE(stats->active))
+		return;
+
+	write_seqlock_irqsave(&stats->lock, flags);
+	GEM_BUG_ON(!READ_ONCE(stats->active));
+	stats->total = ktime_add(stats->total,
+				 ktime_sub(ktime_get(), stats->start));
+	stats->active = false;
+	write_sequnlock_irqrestore(&stats->lock, flags);
+}
+
 static inline struct intel_engine_cs *
 __execlists_schedule_in(struct i915_request *rq)
 {
@@ -1264,7 +1290,7 @@  static inline void
 __execlists_schedule_out(struct i915_request *rq,
 			 struct intel_engine_cs * const engine)
 {
-	struct intel_context * const ce = rq->context;
+	struct intel_context *ce = rq->context;
 
 	/*
 	 * NB process_csb() is not under the engine->active.lock and hence
@@ -1281,6 +1307,7 @@  __execlists_schedule_out(struct i915_request *rq,
 		intel_engine_add_retire(engine, ce->timeline);
 
 	intel_engine_context_out(engine);
+	intel_context_stats_stop(ce);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put_async(engine->gt);
 
@@ -2262,6 +2289,7 @@  static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
+		struct i915_request *rq;
 		bool promote;
 
 		if (++head == num_entries)
@@ -2316,7 +2344,11 @@  static void process_csb(struct intel_engine_cs *engine)
 
 			WRITE_ONCE(execlists->pending[0], NULL);
 		} else {
-			GEM_BUG_ON(!*execlists->active);
+			rq = *execlists->active++;
+			GEM_BUG_ON(!rq);
+
+			GEM_BUG_ON(execlists->active - execlists->inflight >
+				   execlists_num_ports(execlists));
 
 			/* port0 completed, advanced to port1 */
 			trace_ports(execlists, "completed", execlists->active);
@@ -2327,13 +2359,15 @@  static void process_csb(struct intel_engine_cs *engine)
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
 			 */
-			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
+			GEM_BUG_ON(!i915_request_completed(rq) &&
 				   !reset_in_progress(execlists));
-			execlists_schedule_out(*execlists->active++);
 
-			GEM_BUG_ON(execlists->active - execlists->inflight >
-				   execlists_num_ports(execlists));
+			execlists_schedule_out(rq);
 		}
+
+		rq = *execlists->active;
+		if (rq)
+			intel_context_stats_start(rq->context);
 	} while (head != tail);
 
 	execlists->csb_head = head;

[4/6] drm/i915: Track per-context engine busyness

Commit Message

Comments

Patch