@@ -2274,6 +2274,13 @@ struct i915_perf_cs_sample {
u32 ts_offset;
/**
+ * @mmio_offset: Offset into ``&stream->cs_buffer`` where the mmio reg
+ * values for this perf sample will be collected (if the stream is
+ * configured for collection of mmio data)
+ */
+ u32 mmio_offset;
+
+ /**
* @size: buffer size corresponding to this perf sample
*/
u32 size;
@@ -2751,6 +2758,9 @@ struct drm_i915_private {
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
} oa;
+
+ u32 num_mmio;
+ u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
} perf;
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -29,6 +29,7 @@
#include "i915_gem.h"
#include "i915_sw_fence.h"
+#include <drm/i915_drm.h>
struct drm_file;
struct drm_i915_gem_object;
@@ -202,6 +203,8 @@ struct drm_i915_gem_request {
u32 tag;
u32 *pre_ts_offset;
u32 *post_ts_offset;
+ u32 *pre_mmio_offset[I915_PERF_MMIO_NUM_MAX];
+ u32 *post_mmio_offset[I915_PERF_MMIO_NUM_MAX];
};
extern const struct dma_fence_ops i915_fence_ops;
@@ -304,6 +304,7 @@ struct i915_perf_sample_data {
u64 tag;
u64 ts;
const u8 *report;
+ const u8 *mmio;
};
/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
@@ -361,6 +362,7 @@ struct i915_perf_sample_data {
#define SAMPLE_PID (1<<3)
#define SAMPLE_TAG (1<<4)
#define SAMPLE_TS (1<<5)
+#define SAMPLE_MMIO (1<<6)
/**
* struct perf_open_properties - for validated properties given to open a stream
@@ -584,6 +586,53 @@ static int i915_emit_ts_capture(struct drm_i915_gem_request *request,
}
/**
+ * i915_emit_mmio_capture - Insert the commands to capture mmio
+ * data into the GPU command stream
+ * @request: request in whose context the mmio data being collected.
+ * @preallocate: allocate space in ring for related sample.
+ */
+static int i915_emit_mmio_capture(struct drm_i915_gem_request *request,
+ bool preallocate)
+{
+ struct drm_i915_private *dev_priv = request->i915;
+ struct intel_engine_cs *engine = request->engine;
+ int i, num_mmio = engine->num_mmio;
+ u32 cmd, len, *cs;
+
+ len = 4 * num_mmio;
+
+ if (preallocate)
+ request->reserved_space += len;
+ else
+ request->reserved_space -= len;
+
+ cs = intel_ring_begin(request, 4 * num_mmio);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(dev_priv) >= 8)
+ cmd = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ else
+ cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+
+ for (i = 0; i < num_mmio; i++) {
+ *cs++ = cmd;
+ *cs++ = engine->mmio_list[i];
+ if (preallocate)
+ request->pre_mmio_offset[i] = cs++;
+ else
+ request->post_mmio_offset[i] = cs++;
+
+ if (INTEL_GEN(dev_priv) >= 8)
+ *cs++ = 0;
+ else
+ *cs++ = MI_NOOP;
+ }
+ intel_ring_advance(request, cs);
+ return 0;
+}
+
+/**
* i915_perf_stream_emit_sample_capture - Insert the commands to capture perf
* metrics into the GPU command stream
* @stream: Stream to which this request corresponds.
@@ -622,6 +671,12 @@ static void i915_perf_stream_emit_sample_capture(
if (stream->sample_flags & SAMPLE_TAG)
request->tag = tag;
+ if (stream->sample_flags & SAMPLE_MMIO) {
+ ret = i915_emit_mmio_capture(request, preallocate);
+ if (ret)
+ DRM_ERROR("Emit of MMIO capture commands failed\n");
+ }
+
reservation_object_lock(resv, NULL);
if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &request->fence);
@@ -738,6 +793,29 @@ static void i915_perf_stream_patch_sample_ts(struct i915_perf_stream *stream,
}
}
+static void i915_perf_stream_patch_sample_mmio(struct i915_perf_stream *stream,
+ struct drm_i915_gem_request *request,
+ struct i915_perf_cs_sample *sample)
+{
+ int i, num_mmio = stream->engine->num_mmio;
+ u32 mmio_addr;
+
+ mmio_addr = stream->cs_buffer.vma->node.start + sample->mmio_offset;
+
+ switch (sample->id) {
+ case PRE_REQUEST_SAMPLE_ID:
+ for (i = 0; i < num_mmio; i++)
+ *request->pre_mmio_offset[i] = mmio_addr + 4*i;
+ break;
+ case POST_REQUEST_SAMPLE_ID:
+ for (i = 0; i < num_mmio; i++)
+ *request->post_mmio_offset[i] = mmio_addr + 4*i;
+ break;
+ default:
+ DRM_ERROR("Invalid sample being patched\n");
+ }
+}
+
/**
* i915_perf_stream_patch_request - Assign free sample. If none available,
* remove one. Patch offset of the perf sample address with the one from
@@ -776,6 +854,9 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream,
else if (stream->sample_flags & SAMPLE_TS)
i915_perf_stream_patch_sample_ts(stream, request,
sample);
+ if (stream->sample_flags & SAMPLE_MMIO)
+ i915_perf_stream_patch_sample_mmio(stream, request,
+ sample);
spin_unlock_irqrestore(&stream->samples_lock, flags);
sample_id++;
@@ -1109,6 +1190,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
buf += I915_PERF_TS_SAMPLE_SIZE;
}
+ if (sample_flags & SAMPLE_MMIO) {
+ if (copy_to_user(buf, data->mmio, 4 * stream->engine->num_mmio))
+ return -EFAULT;
+ buf += 4 * stream->engine->num_mmio;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -1158,6 +1245,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
u32 sample_flags = stream->sample_flags;
struct i915_perf_sample_data data = { 0 };
+ u32 mmio_list_dummy[I915_PERF_MMIO_NUM_MAX] = { 0 };
if (sample_flags & SAMPLE_OA_SOURCE)
data.source = I915_PERF_SAMPLE_OA_SOURCE_OABUFFER;
@@ -1175,6 +1263,10 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_TS)
data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
+ /* Periodic OA samples don't have mmio associated with them */
+ if (sample_flags & SAMPLE_MMIO)
+ data.mmio = (u8 *)mmio_list_dummy;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1813,6 +1905,9 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
data.ts = gpu_ts;
}
+ if (sample_flags & SAMPLE_MMIO)
+ data.mmio = stream->cs_buffer.vaddr + node->mmio_offset;
+
return append_perf_sample(stream, buf, count, offset, &data);
}
@@ -2443,6 +2538,9 @@ static int init_perf_samples(struct i915_perf_stream *stream)
sample_size += I915_PERF_TS_SAMPLE_SIZE;
}
+ if (stream->sample_flags & SAMPLE_MMIO)
+ sample_size += 4 * stream->engine->num_mmio;
+
while ((offset + sample_size) < stream->cs_buffer.vma->size) {
sample = kzalloc(sizeof(*sample), GFP_KERNEL);
if (sample == NULL) {
@@ -2464,6 +2562,12 @@ static int init_perf_samples(struct i915_perf_stream *stream)
offset = sample->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
}
+ if (stream->sample_flags & SAMPLE_MMIO) {
+ sample->mmio_offset = offset;
+ offset = sample->mmio_offset +
+ 4 * stream->engine->num_mmio;
+ }
+
list_add_tail(&sample->link, &stream->free_samples);
}
@@ -3062,9 +3166,12 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE);
bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT |
- SAMPLE_TS);
+ SAMPLE_TS |
+ SAMPLE_MMIO);
bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
- SAMPLE_TAG);
+ SAMPLE_TAG |
+ SAMPLE_MMIO);
+
struct i915_perf_stream *curr_stream;
struct intel_engine_cs *engine = NULL;
int ret;
@@ -3240,7 +3347,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID/TAG/TS sampling requires engine "
+ DRM_ERROR("PID/TAG/TS/MMIO sampling requires engine "
"to be specified");
ret = -EINVAL;
goto err_enable;
@@ -3294,6 +3401,17 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
srcu_read_unlock(&engine->srcu, idx);
stream->engine = engine;
+
+ if (props->sample_flags & SAMPLE_MMIO) {
+ memset(engine->mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+ engine->num_mmio = dev_priv->perf.num_mmio;
+ memcpy(engine->mmio_list, dev_priv->perf.mmio_list,
+ 4 * engine->num_mmio);
+
+ stream->sample_flags |= SAMPLE_MMIO;
+ stream->sample_size += 4 * engine->num_mmio;
+ }
+
ret = alloc_cs_buffer(stream);
if (ret)
goto err_enable;
@@ -3860,6 +3978,69 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
dev_priv->perf.oa.timestamp_frequency);
}
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv, u32 num_mmio)
+{
+#define GEN_RANGE(l, h) GENMASK(h, l)
+ static const struct register_whitelist {
+ i915_reg_t mmio;
+ uint32_t size;
+ /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+ uint32_t gen_bitmask;
+ } whitelist[] = {
+ { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+ { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+ };
+ int i, count;
+
+ for (count = 0; count < num_mmio; count++) {
+ /* Coarse check on mmio reg addresses being non zero */
+ if (!dev_priv->perf.mmio_list[count])
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(whitelist); i++) {
+ if ((i915_mmio_reg_offset(whitelist[i].mmio) ==
+ dev_priv->perf.mmio_list[count]) &&
+ (1 << INTEL_INFO(dev_priv)->gen &
+ whitelist[i].gen_bitmask))
+ break;
+ }
+
+ if (i == ARRAY_SIZE(whitelist))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int copy_mmio_list(struct drm_i915_private *dev_priv,
+ void __user *mmio)
+{
+ void __user *mmio_list = ((u8 __user *)mmio + 4);
+ u32 num_mmio;
+ int ret;
+
+ if (!mmio)
+ return -EINVAL;
+
+ ret = get_user(num_mmio, (u32 __user *)mmio);
+ if (ret)
+ return ret;
+
+ if (num_mmio > I915_PERF_MMIO_NUM_MAX)
+ return -EINVAL;
+
+ memset(dev_priv->perf.mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+ if (copy_from_user(dev_priv->perf.mmio_list, mmio_list, 4 * num_mmio))
+ return -EFAULT;
+
+ ret = check_mmio_whitelist(dev_priv, num_mmio);
+ if (ret)
+ return ret;
+
+ dev_priv->perf.num_mmio = num_mmio;
+
+ return 0;
+}
+
/**
* read_properties_unlocked - validate + copy userspace stream open properties
* @dev_priv: i915 device instance
@@ -4012,6 +4193,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_TS:
props->sample_flags |= SAMPLE_TS;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_MMIO:
+ ret = copy_mmio_list(dev_priv, (u64 __user *)value);
+ if (ret)
+ return ret;
+ props->sample_flags |= SAMPLE_MMIO;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -449,6 +449,9 @@ struct intel_engine_cs {
*/
struct i915_perf_stream __rcu *exclusive_stream;
struct srcu_struct srcu;
+
+ u32 num_mmio;
+ u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
};
static inline unsigned int
@@ -1393,6 +1393,12 @@ enum drm_i915_perf_sample_oa_source {
I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */
};
+#define I915_PERF_MMIO_NUM_MAX 8
+struct drm_i915_perf_mmio_list {
+ __u32 num_mmio;
+ __u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+};
+
enum drm_i915_perf_property_id {
/**
* Open the stream for a specific context handle (as used with
@@ -1465,6 +1471,13 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_TS,
+ /**
+ * This property requests inclusion of mmio register values in the perf
+ * sample data. The value of this property specifies the address of user
+ * struct having the register addresses.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_MMIO,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1535,6 +1548,7 @@ enum drm_i915_perf_record_type {
* { u64 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
* { u64 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
* { u64 timestamp; } && DRM_I915_PERF_PROP_SAMPLE_TS
+ * { u32 mmio[]; } && DRM_I915_PERF_PROP_SAMPLE_MMIO
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/