diff mbox series

[RFC,3/3] drm/i915: add a new perf configuration execbuf parameter

Message ID 20181008151822.10519-4-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: serialized performance queries | expand

Commit Message

Lionel Landwerlin Oct. 8, 2018, 3:18 p.m. UTC
We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  4 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 +++++++++++++++++++---
 drivers/gpu/drm/i915/i915_request.c        |  4 ++
 drivers/gpu/drm/i915/i915_request.h        |  2 +
 drivers/gpu/drm/i915/intel_lrc.c           | 13 ++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 11 +++-
 include/uapi/drm/i915_drm.h                | 12 ++++-
 7 files changed, 97 insertions(+), 9 deletions(-)

Comments

Chris Wilson Oct. 8, 2018, 3:44 p.m. UTC | #1
Quoting Lionel Landwerlin (2018-10-08 16:18:22)
> We want the ability to dispatch a set of command buffer to the
> hardware, each with a different OA configuration. To achieve this, we
> reuse a couple of fields from the execbuf2 struct (I CAN HAZ
> execbuf3?) to notify what OA configuration should be used for a batch
> buffer. This requires the process making the execbuf with this flag to
> also own the perf fd at the time of execbuf.

Sigh. It's a distinct step from emit_bb_start and should be using
emit_bb_start itself to execute the batch. Use i915_vma_move_to_active
to couple into retirement correctly, and use pin properly.

If you feel emit_bb is doing too much, split it up into primitives
rather than continue to overload it; emit_bb is used and will be used
outside of execbuf.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 193023427b40..564c2e749fd8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -444,6 +444,10 @@  static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_MMAP_GTT_COHERENT:
 		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = dev_priv->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 09187286d346..8b963641f142 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -286,6 +286,8 @@  struct i915_execbuffer {
 	 */
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
+
+	struct i915_vma *oa_config; /** HW configuration for OA, NULL is not needed. */
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1121,6 +1123,32 @@  static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 		*addr = value;
 }
 
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+		      int perf_fd, u32 oa_config_id,
+		      struct i915_vma **out_oa_vma)
+{
+	struct file *perf_file;
+	int ret;
+
+	if (!dev_priv->perf.oa.exclusive_stream)
+		return -EINVAL;
+
+	perf_file = fget(perf_fd);
+	if (!perf_file)
+		return -EINVAL;
+
+	if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+		return -EINVAL;
+
+	fput(perf_file);
+
+	ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+				      NULL, out_oa_vma);
+
+	return ret;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -1173,6 +1201,9 @@  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}
 
+	rq->oa_config = eb->oa_config;
+	eb->oa_config = NULL;
+
 	err = i915_request_await_object(rq, vma->obj, true);
 	if (err)
 		goto err_request;
@@ -1875,12 +1906,15 @@  static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 			return false;
 	}
 
-	if (exec->DR4 == 0xffffffff) {
-		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
-		exec->DR4 = 0;
+	/* We reuse DR1 & DR4 fields for passing the perf config detail. */
+	if (!(exec->flags & I915_EXEC_PERF_CONFIG)) {
+		if (exec->DR4 == 0xffffffff) {
+			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+			exec->DR4 = 0;
+		}
+		if (exec->DR1 || exec->DR4)
+			return false;
 	}
-	if (exec->DR1 || exec->DR4)
-		return false;
 
 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
 		return false;
@@ -2224,6 +2258,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.buffer_count = args->buffer_count;
 	eb.batch_start_offset = args->batch_start_offset;
 	eb.batch_len = args->batch_len;
+	eb.oa_config = NULL;
 
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
@@ -2253,9 +2288,16 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (args->flags & I915_EXEC_PERF_CONFIG) {
+		err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4,
+					    &eb.oa_config);
+		if (err)
+			goto err_out_fence;
+	}
+
 	err = eb_create(&eb);
 	if (err)
-		goto err_out_fence;
+		goto err_perf;
 
 	GEM_BUG_ON(!eb.lut_size);
 
@@ -2365,6 +2407,9 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_batch_unpin;
 	}
 
+	eb.request->oa_config = eb.oa_config;
+	eb.oa_config = NULL;
+
 	if (in_fence) {
 		err = i915_request_await_dma_fence(eb.request, in_fence);
 		if (err < 0)
@@ -2426,6 +2471,9 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
+err_perf:
+	if (eb.oa_config)
+		i915_vma_put(eb.oa_config);
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index abd4dacbab8e..8fb134793925 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -379,6 +379,9 @@  static void i915_request_retire(struct i915_request *request)
 
 	unreserve_gt(request->i915);
 
+	if (request->oa_config)
+		i915_vma_put(request->oa_config);
+
 	i915_sched_node_fini(request->i915, &request->sched);
 	i915_request_put(request);
 }
@@ -704,6 +707,7 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->oa_config = NULL;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 90e9d170a0cd..7a42c9b94877 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -188,6 +188,8 @@  struct i915_request {
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_link;
+
+	struct i915_vma *oa_config; /** HW configuration for OA, NULL is not needed. */
 };
 
 #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b240332838c1..d3d8c0c60d65 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1858,6 +1858,8 @@  static int gen8_emit_bb_start(struct i915_request *rq,
 {
 	u32 *cs;
 	int ret;
+	bool use_oa_config =
+		rq->i915->perf.oa.exclusive_stream && rq->oa_config;
 
 	/* Don't rely in hw updating PDPs, specially in lite-restore.
 	 * Ideally, we should set Force PD Restore in ctx descriptor,
@@ -1875,10 +1877,19 @@  static int gen8_emit_bb_start(struct i915_request *rq,
 		rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
 	}
 
-	cs = intel_ring_begin(rq, 6);
+	cs = intel_ring_begin(rq, use_oa_config ? 10 : 6);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
+	if (use_oa_config) {
+		u32 oa_config_offset = i915_ggtt_offset(rq->oa_config);
+
+		*cs++ = MI_BATCH_BUFFER_START_GEN8;
+		*cs++ = oa_config_offset;
+		*cs++ = 0;
+		*cs++ = MI_NOOP;
+	}
+
 	/*
 	 * WaDisableCtxRestoreArbitration:bdw,chv
 	 *
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b8a7a014d46d..d8ebcf91ce93 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2037,11 +2037,20 @@  hsw_emit_bb_start(struct i915_request *rq,
 		  unsigned int dispatch_flags)
 {
 	u32 *cs;
+	bool use_oa_config =
+		rq->i915->perf.oa.exclusive_stream && rq->oa_config;
 
-	cs = intel_ring_begin(rq, 2);
+	cs = intel_ring_begin(rq, use_oa_config ? 4 : 2);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
+	if (use_oa_config) {
+		u32 oa_config_offset = i915_ggtt_offset(rq->oa_config);
+
+		*cs++ = MI_BATCH_BUFFER_START;
+		*cs++ = oa_config_offset;
+	}
+
 	*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
 		0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
 	/* bit0-7 is the length on GEN6+ */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 62f669030741..4f0b39796d80 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -559,6 +559,8 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 53
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -1078,7 +1080,15 @@  struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/* Request that perf monitoring hardware be reprogrammed before executing the
+ * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf
+ * must respectively contain the file descriptor of the perf monitoring device
+ * and the configuration to program.
+ */
+#define I915_EXEC_PERF_CONFIG   (1<<20)
+
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \