@@ -1951,6 +1951,7 @@ struct drm_i915_private {
struct intel_context *pinned_ctx;
u32 specific_ctx_id;
+ u32 specific_ctx_id_mask;
struct hrtimer poll_check_timer;
wait_queue_head_t poll_wq;
@@ -737,12 +737,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
continue;
}
- /*
- * XXX: Just keep the lower 21 bits for now since I'm not
- * entirely sure if the HW touches any of the higher bits in
- * this field
- */
- ctx_id = report32[2] & 0x1fffff;
+ ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
/*
* Squash whatever is in the CTX_ID field if it's marked as
@@ -1203,6 +1198,36 @@ static int i915_oa_read(struct i915_perf_stream *stream,
return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
}
+static int oa_get_render_lrca(struct drm_i915_private *i915,
+ struct i915_gem_context *ctx,
+ u32 *lrca)
+{
+ struct intel_engine_cs *engine = i915->engine[RCS];
+ struct intel_context *ce;
+ int ret;
+
+ ret = i915_mutex_lock_interruptible(&i915->drm);
+ if (ret)
+ return ret;
+
+ /*
+ * As the ID is the gtt offset of the context's vma we
+ * pin the vma to ensure the ID remains fixed.
+ *
+ * NB: implied RCS engine...
+ */
+ ce = intel_context_pin(ctx, engine);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ i915->perf.oa.pinned_ctx = ce;
+
+ *lrca = i915_ggtt_offset(ce->state);
+
+ return 0;
+}
+
/**
* oa_get_render_ctx_id - determine and hold ctx hw id
* @stream: An i915-perf stream opened for OA metrics
@@ -1215,40 +1240,81 @@ static int i915_oa_read(struct i915_perf_stream *stream,
*/
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct drm_i915_private *i915 = stream->dev_priv;
- if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
- dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
- } else {
- struct intel_engine_cs *engine = dev_priv->engine[RCS];
- struct intel_context *ce;
+ switch (INTEL_GEN(i915)) {
+ case 7: {
int ret;
- ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+ ret = oa_get_render_lrca(i915, stream->ctx,
+ &i915->perf.oa.specific_ctx_id);
if (ret)
return ret;
/*
- * As the ID is the gtt offset of the context's vma we
- * pin the vma to ensure the ID remains fixed.
- *
- * NB: implied RCS engine...
+ * On Haswell we don't do any post processing of the reports
+ * and don't need to use the mask.
*/
- ce = intel_context_pin(stream->ctx, engine);
- mutex_unlock(&dev_priv->drm.struct_mutex);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
+ i915->perf.oa.specific_ctx_id_mask = 0;
+ break;
+ }
- dev_priv->perf.oa.pinned_ctx = ce;
+ case 8:
+ case 9:
+ case 10:
+ if (USES_GUC_SUBMISSION(i915)) {
+ u32 lrca;
+ int ret;
- /*
- * Explicitly track the ID (instead of calling
- * i915_ggtt_offset() on the fly) considering the difference
- * with gen8+ and execlists
- */
- dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
+ ret = oa_get_render_lrca(i915, stream->ctx, &lrca);
+ if (ret)
+ return ret;
+
+ /*
+ * The LRCA is aligned to a page. As a result the
+ * lower 12bits are always at 0 and reused in the
+ * context descriptor for some flags. They won't be
+ * part of the context ID in the OA reports, so squash
+ * those lower bits.
+ */
+ i915->perf.oa.specific_ctx_id =
+ (lrca + LRC_HEADER_PAGES * PAGE_SIZE) >> 12;
+
+ /*
+ * GuC uses the top bit to signal proxy submission, so
+ * ignore that bit if using GuC.
+ */
+ i915->perf.oa.specific_ctx_id_mask =
+ (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+ } else {
+ i915->perf.oa.specific_ctx_id = stream->ctx->hw_id;
+ i915->perf.oa.specific_ctx_id_mask =
+ (1U << GEN8_CTX_ID_WIDTH) - 1;
+ }
+ break;
+
+ case 11: {
+ struct intel_engine_cs *engine = i915->engine[RCS];
+
+ i915->perf.oa.specific_ctx_id =
+ stream->ctx->hw_id << (GEN11_SW_CTX_ID_SHIFT - 32) |
+ engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
+ engine->class << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+ i915->perf.oa.specific_ctx_id_mask =
+ ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
+ ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
+ ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
+ break;
}
+ default:
+ MISSING_CASE(INTEL_GEN(i915));
+ }
+
+ DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
+ i915->perf.oa.specific_ctx_id,
+ i915->perf.oa.specific_ctx_id_mask);
+
return 0;
}
@@ -1265,6 +1331,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
struct intel_context *ce;
dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+ dev_priv->perf.oa.specific_ctx_id_mask = 0;
ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
if (ce) {
@@ -233,6 +233,11 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
/* bits 12-31 */
GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
+ /*
+ * The following 32bits are copied into the OA reports (dword 2).
+ * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
+ * anything below.
+ */
if (INTEL_GEN(ctx->i915) >= 11) {
GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
One thing we didn't really understand about the OA report is that the ContextID field (dword 2) is copy of the context descriptor (dword 1). On Gen8->10 and without using GuC we didn't notice the issue because we only checked the 21bits of the ContextID field in the OA reports which matches exactly the hw_id stored into the context descriptor. When using GuC submission we have an issue of a non matching hw_id because GuC uses bit 20 of the hw_id to signal proxy submission. This change introduces a mask to compare only the relevant bits. On ICL the context descriptor format has changed and we failed to address this. On top of using a mask we also need to shift the bits properly. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104252 BSpec: 1237 Testcase: igt/perf/gen8-unprivileged-single-ctx-counters --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 123 ++++++++++++++++++++++++------- drivers/gpu/drm/i915/intel_lrc.c | 5 ++ 3 files changed, 101 insertions(+), 28 deletions(-)