Message ID | 20180115212455.24046-9-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 15/01/2018 21:24, Chris Wilson wrote: > Rather than have multiple locked instructions inside the notify_ring() > irq handler, move them inside the spinlock and reduce their intrinsic > locking. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- > drivers/gpu/drm/i915/i915_irq.c | 6 +++--- > drivers/gpu/drm/i915/intel_breadcrumbs.c | 13 ++++++++----- > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- > 4 files changed, 14 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 836db90ef81b..08bbd56277e5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -1128,7 +1128,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, > * takes to sleep on a request, on the order of a microsecond. > */ > > - irq = atomic_read(&engine->irq_count); > + irq = READ_ONCE(engine->breadcrumbs.irq_count); > timeout_us += local_clock_us(&cpu); > do { > if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) > @@ -1139,7 +1139,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, > * assume we won't see one in the near future but require > * the engine->seqno_barrier() to fixup coherency. > */ > - if (atomic_read(&engine->irq_count) != irq) > + if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) > break; > > if (signal_pending_state(state, current)) > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 0b272501b738..e5f76d580010 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1073,9 +1073,6 @@ static void notify_ring(struct intel_engine_cs *engine) > if (unlikely(!engine->breadcrumbs.irq_armed)) > return; > > - atomic_inc(&engine->irq_count); > - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); > - > rcu_read_lock(); > > spin_lock(&engine->breadcrumbs.irq_lock); > @@ -1107,6 +1104,9 @@ static void notify_ring(struct intel_engine_cs *engine) > i915_seqno_passed(seqno, wait->seqno - 1)) > tsk = wait->tsk; > } > + > + engine->breadcrumbs.irq_count++; > + __set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); I'm nervous about moving the ENGINE_IRQ_BREADCRUMB setting to be conditional. __i915_request_irq_complete documents the ordering of these things is crucial so I worry we don't miss a wakeup. Once bitten twice shy? Don't know.. irq_count change looks safe, so can I, once again, suggest to split into two patches? :/ Regards, Tvrtko > } else { > if (engine->breadcrumbs.irq_armed) > __intel_engine_disarm_breadcrumbs(engine); > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c > index e3667dc1e96d..7c82cfe23922 100644 > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c > @@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) > struct intel_engine_cs *engine = > from_timer(engine, t, breadcrumbs.hangcheck); > struct intel_breadcrumbs *b = &engine->breadcrumbs; > + unsigned int irq_count; > > if (!b->irq_armed) > return; > > - if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { > - b->hangcheck_interrupts = atomic_read(&engine->irq_count); > + irq_count = READ_ONCE(b->irq_count); > + if (b->hangcheck_interrupts != irq_count) { > + b->hangcheck_interrupts = irq_count; > mod_timer(&b->hangcheck, wait_timeout()); > return; > } > @@ -176,7 +178,7 @@ static void irq_enable(struct intel_engine_cs *engine) > * we still need to force the barrier before reading the seqno, > * just in case. > */ > - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); > + __set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); > > /* Caller disables interrupts */ > spin_lock(&engine->i915->irq_lock); > @@ -270,13 +272,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) > if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) > return false; > > - /* Only start with the heavy weight fake irq timer if we have not > + /* > + * Only start with the heavy weight fake irq timer if we have not > * seen any interrupts since enabling it the first time. If the > * interrupts are still arriving, it means we made a mistake in our > * engine->seqno_barrier(), a timing error that should be transient > * and unlikely to reoccur. > */ > - return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; > + return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; > } > > static void enable_fake_irq(struct intel_breadcrumbs *b) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index c5ff203e42d6..f406d0ff4612 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -305,7 +305,6 @@ struct intel_engine_cs { > > struct drm_i915_gem_object *default_state; > > - atomic_t irq_count; > unsigned long irq_posted; > #define ENGINE_IRQ_BREADCRUMB 0 > #define ENGINE_IRQ_EXECLIST 1 > @@ -340,6 +339,7 @@ struct intel_engine_cs { > > unsigned int hangcheck_interrupts; > unsigned int irq_enabled; > + unsigned int irq_count; > > bool irq_armed : 1; > I915_SELFTEST_DECLARE(bool mock : 1); >
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 836db90ef81b..08bbd56277e5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1128,7 +1128,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, * takes to sleep on a request, on the order of a microsecond. */ - irq = atomic_read(&engine->irq_count); + irq = READ_ONCE(engine->breadcrumbs.irq_count); timeout_us += local_clock_us(&cpu); do { if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) @@ -1139,7 +1139,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req, * assume we won't see one in the near future but require * the engine->seqno_barrier() to fixup coherency. */ - if (atomic_read(&engine->irq_count) != irq) + if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) break; if (signal_pending_state(state, current)) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0b272501b738..e5f76d580010 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1073,9 +1073,6 @@ static void notify_ring(struct intel_engine_cs *engine) if (unlikely(!engine->breadcrumbs.irq_armed)) return; - atomic_inc(&engine->irq_count); - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - rcu_read_lock(); spin_lock(&engine->breadcrumbs.irq_lock); @@ -1107,6 +1104,9 @@ static void notify_ring(struct intel_engine_cs *engine) i915_seqno_passed(seqno, wait->seqno - 1)) tsk = wait->tsk; } + + engine->breadcrumbs.irq_count++; + __set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); } else { if (engine->breadcrumbs.irq_armed) __intel_engine_disarm_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index e3667dc1e96d..7c82cfe23922 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) struct intel_engine_cs *engine = from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned int irq_count; if (!b->irq_armed) return; - if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { - b->hangcheck_interrupts = atomic_read(&engine->irq_count); + irq_count = READ_ONCE(b->irq_count); + if (b->hangcheck_interrupts != irq_count) { + b->hangcheck_interrupts = irq_count; mod_timer(&b->hangcheck, wait_timeout()); return; } @@ -176,7 +178,7 @@ static void irq_enable(struct intel_engine_cs *engine) * we still need to force the barrier before reading the seqno, * just in case. */ - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + __set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ spin_lock(&engine->i915->irq_lock); @@ -270,13 +272,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) return false; - /* Only start with the heavy weight fake irq timer if we have not + /* + * Only start with the heavy weight fake irq timer if we have not * seen any interrupts since enabling it the first time. If the * interrupts are still arriving, it means we made a mistake in our * engine->seqno_barrier(), a timing error that should be transient * and unlikely to reoccur. */ - return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; + return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; } static void enable_fake_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c5ff203e42d6..f406d0ff4612 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -305,7 +305,6 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; - atomic_t irq_count; unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 #define ENGINE_IRQ_EXECLIST 1 @@ -340,6 +339,7 @@ struct intel_engine_cs { unsigned int hangcheck_interrupts; unsigned int irq_enabled; + unsigned int irq_count; bool irq_armed : 1; I915_SELFTEST_DECLARE(bool mock : 1);
Rather than have multiple locked instructions inside the notify_ring() irq handler, move them inside the spinlock and reduce their intrinsic locking. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- drivers/gpu/drm/i915/intel_breadcrumbs.c | 13 ++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-)