@@ -1705,6 +1705,7 @@ struct i915_execbuffer_params {
struct drm_i915_gem_object *batch_obj;
struct intel_context *ctx;
struct drm_i915_gem_request *request;
+ uint32_t tag;
};
/* used in computing the new watermarks state */
@@ -1805,7 +1806,7 @@ struct i915_perf_stream {
* Routine to emit the commands in the command streamer associated
* with the corresponding gpu engine.
*/
- void (*command_stream_hook)(struct drm_i915_gem_request *req);
+ void (*command_stream_hook)(struct drm_i915_gem_request *req, u32 tag);
};
struct i915_oa_ops {
@@ -1837,6 +1838,7 @@ struct i915_perf_cs_data_node {
u32 offset;
u32 ctx_id;
u32 pid;
+ u32 tag;
};
struct drm_i915_private {
@@ -2179,6 +2181,7 @@ struct drm_i915_private {
u32 last_ctx_id;
u32 last_pid;
+ u32 last_tag;
struct list_head node_list;
spinlock_t node_list_lock;
} perf;
@@ -3548,7 +3551,7 @@ void i915_oa_legacy_ctx_switch_notify(struct drm_i915_gem_request *req);
void i915_oa_update_reg_state(struct intel_engine_cs *engine,
struct intel_context *ctx,
uint32_t *reg_state);
-void i915_perf_command_stream_hook(struct drm_i915_gem_request *req);
+void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct drm_device *dev,
@@ -1313,7 +1313,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
if (exec_len == 0)
exec_len = params->batch_obj->base.size;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
ret = engine->dispatch_execbuffer(params->request,
exec_start, exec_len,
@@ -1321,7 +1321,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
if (ret)
return ret;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
@@ -1642,6 +1642,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->batch_obj = batch_obj;
params->ctx = ctx;
params->request = req;
+ params->tag = i915_execbuffer2_get_tag(*args);
ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
err_request:
@@ -88,6 +88,7 @@ struct oa_sample_data {
u32 source;
u32 ctx_id;
u32 pid;
+ u32 tag;
const u8 *report;
};
@@ -134,6 +135,7 @@ static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
#define SAMPLE_OA_SOURCE_INFO (1<<1)
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
+#define SAMPLE_TAG (1<<4)
struct perf_open_properties {
u32 sample_flags;
@@ -158,7 +160,7 @@ struct perf_open_properties {
* perf mutex lock.
*/
-void i915_perf_command_stream_hook(struct drm_i915_gem_request *req)
+void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag)
{
struct intel_engine_cs *engine = req->engine;
struct drm_i915_private *dev_priv = engine->dev->dev_private;
@@ -170,7 +172,7 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *req)
mutex_lock(&dev_priv->perf.streams_lock);
list_for_each_entry(stream, &dev_priv->perf.streams, link) {
if (stream->enabled && stream->command_stream_hook)
- stream->command_stream_hook(req);
+ stream->command_stream_hook(req, tag);
}
mutex_unlock(&dev_priv->perf.streams_lock);
}
@@ -284,7 +286,8 @@ out_unlock:
return ret;
}
-static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req)
+static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req,
+ u32 tag)
{
struct intel_engine_cs *engine = req->engine;
struct intel_ringbuffer *ringbuf = req->ringbuf;
@@ -316,6 +319,7 @@ static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req)
entry->ctx_id = ctx->global_id;
entry->pid = current->pid;
+ entry->tag = tag;
i915_gem_request_assign(&entry->request, req);
addr = dev_priv->perf.command_stream_buf.vma->node.start +
@@ -582,6 +586,12 @@ static int append_oa_sample(struct i915_perf_stream *stream,
buf += 4;
}
+ if (sample_flags & SAMPLE_TAG) {
+ if (copy_to_user(buf, &data->tag, 4))
+ return -EFAULT;
+ buf += 4;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -627,6 +637,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_PID)
data.pid = dev_priv->perf.last_pid;
+ if (sample_flags & SAMPLE_TAG)
+ data.tag = dev_priv->perf.last_tag;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1060,6 +1073,11 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream,
dev_priv->perf.last_pid = node->pid;
}
+ if (sample_flags & SAMPLE_TAG) {
+ data.tag = node->tag;
+ dev_priv->perf.last_tag = node->tag;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1850,7 +1868,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE_INFO);
- bool require_cs_mode = props->sample_flags & SAMPLE_PID;
+ bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
+ SAMPLE_TAG);
bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
int ret;
@@ -1988,7 +2007,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID sampling requires a ring to be specified");
+ DRM_ERROR("PID or TAG sampling require a ring to be specified");
ret = -EINVAL;
goto cs_error;
}
@@ -2021,6 +2040,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size += 4;
}
+ if (props->sample_flags & SAMPLE_TAG) {
+ stream->sample_flags |= SAMPLE_TAG;
+ stream->sample_size += 4;
+ }
+
ret = alloc_command_stream_buf(dev_priv);
if (ret)
goto cs_error;
@@ -2677,6 +2701,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_PID:
props->sample_flags |= SAMPLE_PID;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_TAG:
+ props->sample_flags |= SAMPLE_TAG;
+ break;
case DRM_I915_PERF_PROP_MAX:
BUG();
}
@@ -1002,13 +1002,13 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
args->batch_start_offset;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags);
if (ret)
return ret;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
@@ -796,6 +796,11 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK (0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+ ((eb2).rsvd1 & I915_EXEC_TAG_MASK)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
@@ -1261,6 +1266,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_PID,
+ /**
+ * The value of this property set to 1 requests inclusion of tag in the
+ * perf sample data.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_TAG,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1328,6 +1339,7 @@ enum drm_i915_perf_record_type {
* { u32 source_info; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
* { u32 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
* { u32 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
+ * { u32 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/