@@ -1465,11 +1465,9 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
struct intel_engine_execlists * const execlists = &engine->execlists;
bool tasklet = false;
- if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
- if (READ_ONCE(engine->execlists.active))
- tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
- &engine->irq_posted);
- }
+ if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active))
+ tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
+ &engine->irq_posted);
if (iir & GT_RENDER_USER_INTERRUPT) {
notify_ring(engine);
@@ -1477,7 +1475,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
if (tasklet)
- tasklet_hi_schedule(&execlists->tasklet);
+ execlists_tasklet(execlists);
}
static void gen8_gt_irq_ack(struct drm_i915_private *i915,
@@ -717,6 +717,17 @@ execlists_port_complete(struct intel_engine_execlists * const execlists,
return port;
}
+static inline void
+execlists_tasklet(struct intel_engine_execlists * const execlists)
+{
+ if (tasklet_trylock(&execlists->tasklet)) {
+ execlists->tasklet.func(execlists->tasklet.data);
+ tasklet_unlock(&execlists->tasklet);
+ } else {
+ tasklet_hi_schedule(&execlists->tasklet);
+ }
+}
+
static inline unsigned int
intel_engine_flag(const struct intel_engine_cs *engine)
{
Continuing the themem of bypassing ksoftirqd latency, also first try to directly submit from the CS interrupt handler to clear the ELSP and queue the next. In the past, we have been hesitant to do this as the context switch processing has been quite heavy, requiring forcewaked mmio. However, as we now can read the GPU state from the cacheable HWSP, it is relatively cheap! Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++++++++ 2 files changed, 15 insertions(+), 6 deletions(-)