@@ -77,6 +77,12 @@ config DRM_I915_TRACE_GEM
If in doubt, say "N".
+config DRM_I915_TRACE_IRQ_LATENCY
+ bool "Measure interrupt-to-client-wakeup"
+ depends on DRM_I915_DEBUG_GEM
+ default n
+ select FTRACE
+
config DRM_I915_SW_FENCE_DEBUG_OBJECTS
bool "Enable additional driver debugging for fence objects"
depends on DRM_I915
@@ -1401,12 +1401,12 @@ long i915_request_wait(struct i915_request *rq,
for (;;) {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
- break;
+ goto remove;
}
if (!timeout) {
timeout = -ETIME;
- break;
+ goto remove;
}
if (!qos &&
@@ -1457,6 +1457,16 @@ long i915_request_wait(struct i915_request *rq,
}
}
+ if (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)) {
+ struct drm_i915_private *dev_priv = rq->i915;
+ u32 now = I915_READ(RING_TIMESTAMP(rq->engine->mmio_base));
+
+ trace_i915_wait_irq_latency(rq,
+ now - intel_engine_get_timestamp(rq->engine),
+ rq->global_seqno == intel_engine_get_seqno(rq->engine));
+ }
+
+remove:
intel_engine_remove_wait(rq->engine, &wait);
complete:
__set_current_state(TASK_RUNNING);
@@ -849,6 +849,51 @@ TRACE_EVENT(i915_request_wait_begin,
__entry->flags)
);
+#if defined(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)
+TRACE_EVENT(i915_wait_irq_latency,
+ TP_PROTO(struct i915_request *rq, u32 delay, bool stable),
+ TP_ARGS(rq, delay, stable),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u32, hw_id)
+ __field(u64, ctx)
+ __field(u16, class)
+ __field(u16, instance)
+ __field(u32, seqno)
+ __field(u32, global)
+ __field(u32, delay)
+ __field(u32, time)
+ __field(u32, stable)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = rq->i915->drm.primary->index;
+ __entry->hw_id = rq->gem_context->hw_id;
+ __entry->class = rq->engine->uabi_class;
+ __entry->instance = rq->engine->instance;
+ __entry->ctx = rq->fence.context;
+ __entry->seqno = rq->fence.seqno;
+ __entry->global = rq->global_seqno;
+ __entry->delay = delay;
+ __entry->stable = stable;
+ ),
+
+ TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, delay=%u%s",
+ __entry->dev, __entry->class, __entry->instance,
+ __entry->hw_id, __entry->ctx,
+ __entry->seqno, __entry->global,
+ __entry->delay, __entry->stable ? "" : "*")
+);
+#else
+#if !defined(TRACE_HEADER_MULTI_READ)
+static inline void
+trace_i915_wait_irq_latency(struct i915_request *rq, u32 delta, bool stable)
+{
+}
+#endif
+#endif
+
DEFINE_EVENT(i915_request, i915_request_wait_end,
TP_PROTO(struct i915_request *rq),
TP_ARGS(rq)
@@ -2213,14 +2213,25 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
cs = gen8_emit_ggtt_write(cs, request->global_seqno,
intel_hws_seqno_address(request->engine));
+
+ if (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(request->engine->mmio_base));
+ *cs++ = intel_hws_timestamp_address(request->engine);
+ *cs++ = MI_NOOP;
+ }
+
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_sz =
+ 6 + (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY) ? 4 : 0) +
+ WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
@@ -2229,14 +2240,25 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
intel_hws_seqno_address(request->engine));
+
+ if (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY)) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(request->engine->mmio_base));
+ *cs++ = intel_hws_timestamp_address(request->engine);
+ *cs++ = MI_NOOP;
+ }
+
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_rcs_sz =
+ 8 + (IS_ENABLED(CONFIG_DRM_I915_TRACE_IRQ_LATENCY) ? 4 : 0) +
+ WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct i915_request *rq)
{
@@ -786,6 +786,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
#define I915_GEM_HWS_PREEMPT_INDEX 0x32
#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_TIMESTAMP_INDEX 0x34
+#define I915_GEM_HWS_TIMESTAMP_ADDR (I915_GEM_HWS_TIMESTAMP_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
#define I915_GEM_HWS_SCRATCH_INDEX 0x40
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
@@ -916,6 +918,11 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
}
+static inline u32 intel_engine_get_timestamp(struct intel_engine_cs *engine)
+{
+ return intel_read_status_page(engine, I915_GEM_HWS_TIMESTAMP_INDEX);
+}
+
static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
{
/* We are only peeking at the tail of the submit queue (and not the
@@ -950,6 +957,11 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
}
+static inline u32 intel_hws_timestamp_address(struct intel_engine_cs *engine)
+{
+ return engine->status_page.ggtt_offset + I915_GEM_HWS_TIMESTAMP_ADDR;
+}
+
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
If we record the time we send the interrupt, and then the time we wake the client up, we can measure that wake up latency. This can be very useful for detecting issues and in particular whether the CPU power management is accidentally delaying clients from their work. To use: perf record -e i915:i915_wait_irq_latency perf script However, using perf/tracepoints seems to be preventing CPU power management; completely obliterating the effect we are trying to investigate. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/Kconfig.debug | 6 ++++ drivers/gpu/drm/i915/i915_request.c | 14 ++++++-- drivers/gpu/drm/i915/i915_trace.h | 45 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++++++ 5 files changed, 99 insertions(+), 4 deletions(-)