diff mbox

[RFC,10/10] drm/i915: Allow clients to query own per-engine busyness

Message ID 20180522123020.31624-11-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin May 22, 2018, 12:30 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Some customers want to know how much of the GPU time are their clients
using in order to make dynamic load balancing decisions.

With the accounting infrastructure in place in the previous patch, we add
a new context param (I915_CONTEXT_GET_ENGINES_BUSY) which points to struct
drm_i915_context_engines_busy, followed by a variable number of structs
drm_i915_context_engine_busy.

Userspace needs to provide the number of attached structures in the
num_engines fields, as well as set args->size to byte size of the provided
buffer.

Attached drm_i915_context_engine_busy objects need to have the class and
instance of the engine which userspace wants to query busyness of
initialized.

Kernel will then report accumulated engine busyness as monotonically
increasing number of nano-seconds the engine spent executing jobs
belonging to this context.

v2:
 * Use intel_context_engine_get_busy_time.
 * Refactor to only use struct_mutex while initially enabling engine
   stats.

v3:
 * Fix stats enabling.

v4:
 * Change uAPI to enable querying multiple engines at a time.
   (Chris Wilson)

v5:
 * Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: gordon.kelly@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c | 100 ++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_context.h |   1 +
 include/uapi/drm/i915_drm.h             |  21 +++++
 3 files changed, 117 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2fcae26f63bb..7279128becbd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -117,9 +117,10 @@  static void lut_close(struct i915_gem_context *ctx)
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
+	struct drm_i915_private *i915 = ctx->i915;
 	unsigned int n;
 
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
 	i915_ppgtt_put(ctx->ppgtt);
@@ -127,6 +128,9 @@  static void i915_gem_context_free(struct i915_gem_context *ctx)
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
 
+		if (i915->engine[n] && ce->stats.enabled)
+			intel_disable_engine_stats(i915->engine[n]);
+
 		if (ce->ops)
 			ce->ops->destroy(ce);
 	}
@@ -136,7 +140,7 @@  static void i915_gem_context_free(struct i915_gem_context *ctx)
 
 	list_del(&ctx->link);
 
-	ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id);
+	ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
 	kfree_rcu(ctx, rcu);
 }
 
@@ -733,11 +737,93 @@  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int
+get_engines_busy(struct drm_i915_private *i915,
+		 struct i915_gem_context *ctx,
+		 struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_context_engine_busy __user *busy_user;
+	struct drm_i915_context_engines_busy engines;
+	struct drm_i915_context_engine_busy busy;
+	bool mutex = false;
+	unsigned int i;
+	int ret = 0;
+
+	if (args->size < sizeof(engines))
+		return -EINVAL;
+
+	if (copy_from_user(&engines, u64_to_user_ptr(args->value),
+			   sizeof(engines)))
+		return -EFAULT;
+
+	if (engines.pad || engines.mbz)
+		return -EINVAL;
+
+	if (engines.num_engines == 0 || engines.num_engines > I915_NUM_ENGINES)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, args->value,
+		       sizeof(engines) + engines.num_engines * sizeof(busy)))
+		return -EFAULT;
+
+	busy_user = (struct drm_i915_context_engine_busy __user *)
+		    ((char __user *)args->value + sizeof(engines));
+
+	for (i = 0; i < engines.num_engines; i++, busy_user++) {
+		struct intel_engine_cs *engine;
+		struct intel_context *ce;
+
+		__copy_from_user(busy_user, &busy, sizeof(busy));
+
+		if (busy.mbz || busy.flags || busy.busy) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		engine = intel_engine_lookup_user(i915,
+						  busy.class, busy.instance);
+		if (!engine) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		/* Enable stats on first query. */
+		ce = to_intel_context(ctx, engine);
+		if (!READ_ONCE(ce->stats.enabled)) {
+			/* Grab mutex if need to enable engine stats. */
+			if (!mutex) {
+				ret = i915_mutex_lock_interruptible(&i915->drm);
+				if (!ret)
+					break;
+				mutex = true;
+			}
+
+			if (!ce->stats.enabled) {
+				ret = intel_enable_engine_stats(engine);
+				if (!ret)
+					goto out;
+				ce->stats.enabled = true;
+			}
+		}
+
+		busy.busy = ktime_to_ns(intel_context_get_busy_time(ce));
+
+		__copy_to_user(busy_user, &busy, sizeof(busy));
+	}
+
+out:
+	if (mutex)
+		mutex_unlock(&i915->drm.struct_mutex);
+
+	return ret;
+}
+
 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret = 0;
 
@@ -756,10 +842,10 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_GTT_SIZE:
 		if (ctx->ppgtt)
 			args->value = ctx->ppgtt->base.total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
+		else if (i915->mm.aliasing_ppgtt)
+			args->value = i915->mm.aliasing_ppgtt->base.total;
 		else
-			args->value = to_i915(dev)->ggtt.base.total;
+			args->value = i915->ggtt.base.total;
 		break;
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
 		args->value = i915_gem_context_no_error_capture(ctx);
@@ -770,6 +856,9 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->sched.priority;
 		break;
+	case I915_CONTEXT_GET_ENGINES_BUSY:
+		ret = get_engines_busy(i915, ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -845,6 +934,7 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_GET_ENGINES_BUSY:
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index e3d9948f7186..0567802beb1b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -163,6 +163,7 @@  struct i915_gem_context {
 
 		struct intel_context_stats {
 			seqlock_t lock;
+			bool enabled;
 			bool active;
 			ktime_t start;
 			ktime_t total;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2ecb6cef2af..bdd4b94f4c9b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1450,6 +1450,26 @@  struct drm_i915_gem_userptr {
 	__u32 handle;
 };
 
+struct drm_i915_context_engine_busy {
+	__u8 class; /* in/out */
+	__u8 instance; /* in/out */
+
+	__u16 mbz;
+
+	__u32 flags; /* in/out/mbz */
+
+	__u64 busy; /* out/mbz */
+};
+
+struct drm_i915_context_engines_busy {
+	__u32 num_engines; /* in */
+	__u32 pad; /* mbz */
+
+	__u64 mbz;
+
+	struct drm_i915_context_engine_busy engines[0];
+};
+
 struct drm_i915_gem_context_param {
 	__u32 ctx_id;
 	__u32 size;
@@ -1463,6 +1483,7 @@  struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+#define I915_CONTEXT_GET_ENGINES_BUSY	0x7
 	__u64 value;
 };