diff mbox

[RFC,8/8] drm/i915: Add support to add execbuffer tags to OA counter reports

Message ID 1438753977-20335-9-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com Aug. 5, 2015, 5:52 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

This patch enables userspace to specify tags (per workload), provided via
execbuffer ioctl, which could be added to OA reports, to help associate
reports with the corresponding workloads.

There may be multiple stages within a single context, from a userspace
perspective. An ability is needed to individually associate the OA reports
with their corresponding workloads(execbuffers), which may not be possible
solely with ctx_id or pid information. This patch enables such a mechanism.

In this patch, rsvd2 field of execbuffer arguments is being utilized for
passing the tag. A new bitfield in execbuffer flags is introduced in order
to inform kernel of the tag being passed in execbuffer arguments.

v2: Changes (as suggested by Chris):
    - Using upper 32 bits of rsvd1 field for passing tag (as it is a
      natural complement to ctx_id which is there in lower 32 bits of rsvd1
    - Removed the flags field to inform kernel of tag being passed.
    - Better ABI definition.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  8 +++++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 ++++--
 drivers/gpu/drm/i915/i915_oa_perf.c        | 23 ++++++++++++++++++++---
 include/uapi/drm/i915_drm.h                | 13 ++++++++++++-
 4 files changed, 41 insertions(+), 9 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6e6f1be..d5d9156 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1673,6 +1673,7 @@  struct i915_oa_rcs_node {
 	bool discard;
 	u32 ctx_id;
 	u32 pid;
+	u32 tag;
 };
 
 extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -1983,11 +1984,12 @@  struct drm_i915_private {
 		struct work_struct forward_work;
 		struct work_struct event_destroy_work;
 #define I915_OA_SAMPLE_PID		(1<<0)
+#define I915_OA_SAMPLE_TAG		(1<<1)
 		int sample_info_flags;
 	} oa_pmu;
 
 	void (*emit_profiling_data[I915_PROFILE_MAX])
-		(struct drm_i915_gem_request *req, u32 global_ctx_id);
+		(struct drm_i915_gem_request *req, u32 global_ctx_id, u32 tag);
 #endif
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3169,7 +3171,7 @@  void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 				  struct intel_context *context);
 void i915_emit_profiling_data(struct drm_i915_gem_request *req,
-				u32 global_ctx_id);
+				u32 global_ctx_id, u32 tag);
 #else
 static inline void
 i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -3178,7 +3180,7 @@  static inline void
 i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 			     struct intel_context *context) {}
 void i915_emit_profiling_data(struct drm_i915_gem_request *req,
-				u32 global_ctx_id) {};
+				u32 global_ctx_id, u32 tag) {};
 #endif
 
 /* i915_gem_evict.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e58b10d..7f2246f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1317,7 +1317,8 @@  i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 			goto error;
 	}
 
-	i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id);
+	i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id,
+				i915_execbuffer2_get_tag(*args));
 
 	exec_len = args->batch_len;
 	if (cliprects) {
@@ -1341,7 +1342,8 @@  i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 			return ret;
 	}
 
-	i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id);
+	i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id,
+				i915_execbuffer2_get_tag(*args));
 
 	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
 
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index f4b8a2b..48591fc 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -26,7 +26,7 @@  static int hsw_perf_format_sizes[] = {
 };
 
 void i915_emit_profiling_data(struct drm_i915_gem_request *req,
-				u32 global_ctx_id)
+				u32 global_ctx_id, u32 tag)
 {
 	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -34,7 +34,8 @@  void i915_emit_profiling_data(struct drm_i915_gem_request *req,
 
 	for (i = I915_PROFILE_OA; i < I915_PROFILE_MAX; i++) {
 		if (dev_priv->emit_profiling_data[i])
-			dev_priv->emit_profiling_data[i](req, global_ctx_id);
+			dev_priv->emit_profiling_data[i](req, global_ctx_id,
+							tag);
 	}
 }
 
@@ -42,7 +43,7 @@  void i915_emit_profiling_data(struct drm_i915_gem_request *req,
  * Emits the commands to capture OA perf report, into the Render CS
  */
 static void i915_oa_emit_perf_report(struct drm_i915_gem_request *req,
-				u32 global_ctx_id)
+				u32 global_ctx_id, u32 tag)
 {
 	struct intel_engine_cs *ring = req->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -71,6 +72,8 @@  static void i915_oa_emit_perf_report(struct drm_i915_gem_request *req,
 	entry->ctx_id = global_ctx_id;
 	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
 		entry->pid = current->pid;
+	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+		entry->tag = tag;
 	i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
 
 	spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
@@ -308,6 +311,7 @@  static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
 	u8 *snapshot, *current_ptr;
 	struct drm_i915_oa_node_ctx_id *ctx_info;
 	struct drm_i915_oa_node_pid *pid_info;
+	struct drm_i915_oa_node_tag *tag_info;
 	struct perf_raw_record raw;
 	u64 snapshot_ts;
 
@@ -326,6 +330,13 @@  static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
 		current_ptr = snapshot + snapshot_size;
 	}
 
+	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG) {
+		tag_info = (struct drm_i915_oa_node_tag *)current_ptr;
+		tag_info->tag = node->tag;
+		snapshot_size += sizeof(*tag_info);
+		current_ptr = snapshot + snapshot_size;
+	}
+
 	/* Flush the periodic snapshots till the ts of this OA report */
 	snapshot_ts = *(u64 *)(snapshot + 4);
 	flush_oa_snapshots(dev_priv, true, snapshot_ts);
@@ -686,6 +697,9 @@  static int init_oa_rcs_buffer(struct perf_event *event)
 	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
 		node_size += sizeof(struct drm_i915_oa_node_pid);
 
+	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+		node_size += sizeof(struct drm_i915_oa_node_tag);
+
 	/* node size has to be aligned to 64 bytes, since only 64 byte aligned
 	 * addresses can be given to OA unit for dumping OA reports */
 	node_size = ALIGN(node_size, 64);
@@ -841,6 +855,9 @@  static int i915_oa_event_init(struct perf_event *event)
 		if (oa_attr.sample_pid)
 			dev_priv->oa_pmu.sample_info_flags |=
 					I915_OA_SAMPLE_PID;
+		if (oa_attr.sample_tag)
+			dev_priv->oa_pmu.sample_info_flags |=
+					I915_OA_SAMPLE_TAG;
 	}
 
 	report_format = oa_attr.format;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 65e8297..abe5826 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -94,7 +94,8 @@  typedef struct _drm_i915_oa_attr {
 	__u64 single_context : 1,
 		multiple_context_mode:1,
 		sample_pid:1,
-		__reserved_1:61;
+		sample_tag:1,
+		__reserved_1:60;
 } drm_i915_oa_attr_t;
 
 /* Header for PERF_RECORD_DEVICE type events */
@@ -134,6 +135,11 @@  struct drm_i915_oa_node_pid {
 	__u32 pad;
 };
 
+struct drm_i915_oa_node_tag {
+	__u32 tag;
+	__u32 pad;
+};
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
@@ -848,6 +854,11 @@  struct drm_i915_gem_execbuffer2 {
 #define i915_execbuffer2_get_context_id(eb2) \
 	((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK		(0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+	((eb2).rsvd1 & I915_EXEC_TAG_MASK)
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;