Message ID | 20180627201304.15817-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Chris Wilson <chris@chris-wilson.co.uk> writes: > By taking advantage of the RCU protection of the task struct, we can find > the appropriate signaler under the spinlock and then release the spinlock > before waking the task and signaling the fence. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++----------- > 1 file changed, 21 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 46aaef5c1851..56a080bc4498 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1145,21 +1145,23 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) > > static void notify_ring(struct intel_engine_cs *engine) > { > + const u32 seqno = intel_engine_get_seqno(engine); > struct i915_request *rq = NULL; > + struct task_struct *tsk = NULL; > struct intel_wait *wait; > > - if (!engine->breadcrumbs.irq_armed) > + if (unlikely(!engine->breadcrumbs.irq_armed)) > return; > > atomic_inc(&engine->irq_count); > - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); > + > + rcu_read_lock(); > > spin_lock(&engine->breadcrumbs.irq_lock); > wait = engine->breadcrumbs.irq_wait; > if (wait) { > - bool wakeup = engine->irq_seqno_barrier; > - > - /* We use a callback from the dma-fence to submit > + /* > + * We use a callback from the dma-fence to submit > * requests after waiting on our own requests. To > * ensure minimum delay in queuing the next request to > * hardware, signal the fence now rather than wait for > @@ -1170,19 +1172,22 @@ static void notify_ring(struct intel_engine_cs *engine) > * and to handle coalescing of multiple seqno updates > * and many waiters. > */ > - if (i915_seqno_passed(intel_engine_get_seqno(engine), > - wait->seqno)) { > + if (i915_seqno_passed(seqno, wait->seqno)) { > struct i915_request *waiter = wait->request; > > - wakeup = true; > if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, > &waiter->fence.flags) && > intel_wait_check_request(wait, waiter)) > rq = i915_request_get(waiter); > - } > > - if (wakeup) > - wake_up_process(wait->tsk); > + tsk = wait->tsk; > + } else { > + if (engine->irq_seqno_barrier) { > + set_bit(ENGINE_IRQ_BREADCRUMB, > + &engine->irq_posted); > + tsk = wait->tsk; > + } > + } > } else { > if (engine->breadcrumbs.irq_armed) > __intel_engine_disarm_breadcrumbs(engine); > @@ -1195,6 +1200,11 @@ static void notify_ring(struct intel_engine_cs *engine) > i915_request_put(rq); > } > > + if (tsk && tsk->state & TASK_NORMAL) > + wake_up_process(tsk); > + > + rcu_read_unlock(); > + > trace_intel_engine_notify(engine, wait); > } > > -- > 2.18.0
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 46aaef5c1851..56a080bc4498 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1145,21 +1145,23 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + const u32 seqno = intel_engine_get_seqno(engine); struct i915_request *rq = NULL; + struct task_struct *tsk = NULL; struct intel_wait *wait; - if (!engine->breadcrumbs.irq_armed) + if (unlikely(!engine->breadcrumbs.irq_armed)) return; atomic_inc(&engine->irq_count); - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + rcu_read_lock(); spin_lock(&engine->breadcrumbs.irq_lock); wait = engine->breadcrumbs.irq_wait; if (wait) { - bool wakeup = engine->irq_seqno_barrier; - - /* We use a callback from the dma-fence to submit + /* + * We use a callback from the dma-fence to submit * requests after waiting on our own requests. To * ensure minimum delay in queuing the next request to * hardware, signal the fence now rather than wait for @@ -1170,19 +1172,22 @@ static void notify_ring(struct intel_engine_cs *engine) * and to handle coalescing of multiple seqno updates * and many waiters. */ - if (i915_seqno_passed(intel_engine_get_seqno(engine), - wait->seqno)) { + if (i915_seqno_passed(seqno, wait->seqno)) { struct i915_request *waiter = wait->request; - wakeup = true; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) rq = i915_request_get(waiter); - } - if (wakeup) - wake_up_process(wait->tsk); + tsk = wait->tsk; + } else { + if (engine->irq_seqno_barrier) { + set_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted); + tsk = wait->tsk; + } + } } else { if (engine->breadcrumbs.irq_armed) __intel_engine_disarm_breadcrumbs(engine); @@ -1195,6 +1200,11 @@ static void notify_ring(struct intel_engine_cs *engine) i915_request_put(rq); } + if (tsk && tsk->state & TASK_NORMAL) + wake_up_process(tsk); + + rcu_read_unlock(); + trace_intel_engine_notify(engine, wait); }
By taking advantage of the RCU protection of the task struct, we can find the appropriate signaler under the spinlock and then release the spinlock before waking the task and signaling the fence. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> --- drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-)