@@ -1996,7 +1996,8 @@ struct i915_perf_stream_ops {
*/
void (*emit_sample_capture)(struct i915_perf_stream *stream,
struct drm_i915_gem_request *request,
- bool preallocate);
+ bool preallocate,
+ u32 tag);
};
enum i915_perf_stream_state {
@@ -2080,6 +2081,7 @@ struct i915_perf_stream {
u32 last_ctx_id;
u32 last_pid;
+ u32 last_tag;
};
/**
@@ -2196,6 +2198,17 @@ struct i915_perf_cs_sample {
* submitted, pertaining to this perf sample
*/
u32 pid;
+
+ /**
+ * @tag: Tag associated with workload, for which the perf sample is
+ * being collected.
+ *
+ * Userspace can specify tags (provided via execbuffer ioctl), which
+ * can be associated with the perf samples, and be used to functionally
+ * distinguish different workload stages, and associate samples with
+ * these different stages.
+ */
+ u32 tag;
};
struct intel_cdclk_state {
@@ -3723,7 +3736,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct i915_gem_context *ctx,
uint32_t *reg_state);
void i915_perf_emit_sample_capture(struct drm_i915_gem_request *req,
- bool preallocate);
+ bool preallocate,
+ u32 tag);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
@@ -242,6 +242,7 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+ uint32_t tag;
};
/*
@@ -1194,7 +1195,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- i915_perf_emit_sample_capture(rq, true);
+ i915_perf_emit_sample_capture(rq, true, eb->tag);
err = eb->engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
@@ -1202,7 +1203,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- i915_perf_emit_sample_capture(rq, false);
+ i915_perf_emit_sample_capture(rq, false, eb->tag);
GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
i915_vma_move_to_active(batch, rq, 0);
@@ -2033,7 +2034,7 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
- i915_perf_emit_sample_capture(eb->request, true);
+ i915_perf_emit_sample_capture(eb->request, true, eb->tag);
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
@@ -2043,7 +2044,7 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
- i915_perf_emit_sample_capture(eb->request, false);
+ i915_perf_emit_sample_capture(eb->request, false, eb->tag);
return 0;
}
@@ -2168,6 +2169,8 @@ static int eb_submit(struct i915_execbuffer *eb)
if (!eb.engine)
return -EINVAL;
+ eb.tag = i915_execbuffer2_get_tag(*args);
+
if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
if (!HAS_RESOURCE_STREAMER(eb.i915)) {
DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
@@ -294,6 +294,7 @@ struct i915_perf_sample_data {
u64 source;
u64 ctx_id;
u64 pid;
+ u64 tag;
const u8 *report;
};
@@ -350,6 +351,7 @@ struct i915_perf_sample_data {
#define SAMPLE_OA_SOURCE (1<<1)
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
+#define SAMPLE_TAG (1<<4)
/**
* struct perf_open_properties - for validated properties given to open a stream
@@ -402,12 +404,14 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
* the command stream of a GPU engine.
* @request: request in whose context the metrics are being collected.
* @preallocate: allocate space in ring for related sample.
+ * @tag: userspace provided tag to be associated with the perf sample
*
* The function provides a hook through which the commands to capture perf
* metrics, are inserted into the command stream of a GPU engine.
*/
void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
- bool preallocate)
+ bool preallocate,
+ u32 tag)
{
struct intel_engine_cs *engine = request->engine;
struct drm_i915_private *dev_priv = engine->i915;
@@ -422,7 +426,8 @@ void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
if (stream && (stream->state == I915_PERF_STREAM_ENABLED) &&
stream->cs_mode)
stream->ops->emit_sample_capture(stream, request,
- preallocate);
+ preallocate, tag);
+
srcu_read_unlock(&engine->perf_srcu, idx);
}
@@ -591,11 +596,13 @@ static int i915_emit_oa_report_capture(
* @stream: An i915-perf stream opened for GPU metrics
* @request: request in whose context the metrics are being collected.
* @preallocate: allocate space in ring for related sample.
+ * @tag: userspace provided tag to be associated with the perf sample
*/
static void i915_perf_stream_emit_sample_capture(
struct i915_perf_stream *stream,
struct drm_i915_gem_request *request,
- bool preallocate)
+ bool preallocate,
+ u32 tag)
{
struct reservation_object *resv = stream->cs_buffer.vma->resv;
struct i915_perf_cs_sample *sample;
@@ -611,6 +618,7 @@ static void i915_perf_stream_emit_sample_capture(
sample->request = i915_gem_request_get(request);
sample->ctx_id = request->ctx->hw_id;
sample->pid = current->pid;
+ sample->tag = tag;
insert_perf_sample(stream, sample);
@@ -933,6 +941,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
buf += 8;
}
+ if (sample_flags & SAMPLE_TAG) {
+ if (copy_to_user(buf, &data->tag, 8))
+ return -EFAULT;
+ buf += 8;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -973,6 +987,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_PID)
data.pid = stream->last_pid;
+ if (sample_flags & SAMPLE_TAG)
+ data.tag = stream->last_tag;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1575,6 +1592,11 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
stream->last_pid = node->pid;
}
+ if (sample_flags & SAMPLE_TAG) {
+ data.tag = node->tag;
+ stream->last_tag = node->tag;
+ }
+
return append_perf_sample(stream, buf, count, offset, &data);
}
@@ -2736,7 +2758,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE);
- bool require_cs_mode = props->sample_flags & SAMPLE_PID;
+ bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
+ SAMPLE_TAG);
bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
struct i915_perf_stream *curr_stream;
struct intel_engine_cs *engine = NULL;
@@ -2895,7 +2918,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID sampling requires a ring to be specified");
+ DRM_ERROR("PID/TAG sampling requires a ring to be specified");
ret = -EINVAL;
goto err_enable;
}
@@ -2924,6 +2947,11 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
stream->sample_size += 8;
}
+ if (props->sample_flags & SAMPLE_TAG) {
+ stream->sample_flags |= SAMPLE_TAG;
+ stream->sample_size += 8;
+ }
+
engine = dev_priv->engine[props->engine];
idx = srcu_read_lock(&engine->perf_srcu);
@@ -3641,6 +3669,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_PID:
props->sample_flags |= SAMPLE_PID;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_TAG:
+ props->sample_flags |= SAMPLE_TAG;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -935,6 +935,11 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK (0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+ (((eb2).rsvd1 & I915_EXEC_TAG_MASK) >> 32)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
@@ -1414,6 +1419,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_PID,
+ /**
+ * The value of this property set to 1 requests inclusion of tag in the
+ * perf sample data.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_TAG,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1482,6 +1493,7 @@ enum drm_i915_perf_record_type {
* { u64 source; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
* { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
* { u64 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
+ * { u64 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/