[RFC,8/8] drm/i915: Fallback to hw context runtime when sw tracking is not available
diff mbox series

Message ID 20200110133049.2705-9-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series
  • Per client engine busyness
Related show

Commit Message

Tvrtko Ursulin Jan. 10, 2020, 1:30 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

In GuC mode we are not receiving the context switch interrupts to be able
to accurately track context runtimes.

We can fallback to using PPHWSP counter updated by the GPU on context save.

QQQ
Downsides are: 1) we do not see currently executing batch and 2) with a
12MHz command streamer timestamp timer frequency the 32-bit counter wraps
every ~358 seconds. This makes endless OpenCL batches with hearbeats
turned off also a problem.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drm_client.c | 34 ++++++++++++++++++++------
 1 file changed, 27 insertions(+), 7 deletions(-)

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 55b2f86cc4c1..0b84ae528dcc 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -58,6 +58,24 @@  sw_busy_add(struct i915_gem_context *ctx, unsigned int engine_class)
 	return total;
 }
 
+static u64
+hw_busy_add(struct i915_gem_context *ctx, unsigned int engine_class)
+{
+	struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	u64 total = 0;
+
+	for_each_gem_engine(ce, engines, it) {
+		if (ce->engine->uabi_class != engine_class)
+			continue;
+
+		total += intel_context_get_hw_runtime_ns(ce);
+	}
+
+	return total;
+}
+
 static ssize_t
 show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
 {
@@ -68,12 +86,14 @@  show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
 	struct i915_gem_context *ctx;
 	u64 total = 0;
 
-	if (i915_attr->no_busy_stats)
-		return -ENODEV;
-
 	rcu_read_lock();
-	list_for_each_entry_rcu(ctx, list, client_link)
-		total += sw_busy_add(ctx, engine_class);
+	if (i915_attr->no_busy_stats) {
+		list_for_each_entry_rcu(ctx, list, client_link)
+			total += hw_busy_add(ctx, engine_class);
+	} else {
+		list_for_each_entry_rcu(ctx, list, client_link)
+			total += sw_busy_add(ctx, engine_class);
+	}
 	rcu_read_unlock();
 
 	return snprintf(buf, PAGE_SIZE, "%llu\n", total);
@@ -164,7 +184,7 @@  __i915_drm_client_register(struct i915_drm_client *client,
 			if (ret) {
 				int j, k;
 
-				/* Unwind if not available. */
+				/* Unwind and fallback if not available. */
 				j = 0;
 				for_each_uabi_engine(engine, i915) {
 					if (j++ == i)
@@ -181,7 +201,7 @@  __i915_drm_client_register(struct i915_drm_client *client,
 				}
 
 				dev_notice_once(i915->drm.dev,
-						"Engine busy stats not available! (%d)",
+						"Reduced accuracy context runtime mode (%d)",
 						ret);
 				break;
 			}