Message ID | 20170222114610.5819-11-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 22/02/2017 11:46, Chris Wilson wrote: > After the request is cancelled, we then need to remove it from the > global execution timeline and return it to the context timeline, the > inverse of submit_request(). > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem_request.c | 58 +++++++++++++++++++++- > drivers/gpu/drm/i915/i915_gem_request.h | 3 ++ > drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 ++++++- > drivers/gpu/drm/i915/intel_ringbuffer.h | 6 --- > drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 6 +++ > 5 files changed, 83 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index d18f450977e0..97116e492d01 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) > spin_unlock_irqrestore(&engine->timeline->lock, flags); > } > > +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) > +{ > + struct intel_engine_cs *engine = request->engine; > + struct intel_timeline *timeline; > + > + assert_spin_locked(&engine->timeline->lock); > + > + /* Only unwind in reverse order, required so that the per-context list > + * is kept in seqno/ring order. > + */ > + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); > + engine->timeline->seqno--; > + > + /* We may be recursing from the signal callback of another i915 fence */ Copy-paste of the comment of there will really be preemption triggered from the signal callback? > + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > + request->global_seqno = 0; > + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) > + intel_engine_cancel_signaling(request); > + spin_unlock(&request->lock); > + > + /* Transfer back from the global per-engine timeline to per-context */ > + timeline = request->timeline; > + GEM_BUG_ON(timeline == engine->timeline); > + > + spin_lock(&timeline->lock); > + list_move(&request->link, &timeline->requests); > + spin_unlock(&timeline->lock); > + > + /* We don't need to wake_up any waiters on request->execute, they > + * will get woken by any other event or us re-adding this request > + * to the engine timeline (__i915_gem_request_submit()). The waiters > + * should be quite adapt at finding that the request now has a new > + * global_seqno to the one they went to sleep on. > + */ > +} > + > +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) > +{ > + struct intel_engine_cs *engine = request->engine; > + unsigned long flags; > + > + /* Will be called from irq-context when using foreign fences. */ > + spin_lock_irqsave(&engine->timeline->lock, flags); > + > + __i915_gem_request_unsubmit(request); > + > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > +} > + > static int __i915_sw_fence_call > submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) > { > @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, > if (flags & I915_WAIT_LOCKED) > add_wait_queue(errq, &reset); > > - intel_wait_init(&wait, i915_gem_request_global_seqno(req)); > + wait.tsk = current; > > +restart: > reset_wait_queue(&req->execute, &exec); > + wait.seqno = i915_gem_request_global_seqno(req); Not sure if it is worth dropping intel_wait_init, I presume to avoid assigning the task twice? It will still be the same task so just moving the intel_wait_init here would be clearer. > if (!wait.seqno) { > do { > set_current_state(state); > @@ -1135,6 +1186,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, > /* Only spin if we know the GPU is processing this request */ > if (i915_spin_request(req, state, 2)) > break; > + > + if (i915_gem_request_global_seqno(req) != wait.seqno) { > + intel_engine_remove_wait(req->engine, &wait); > + goto restart; > + } > } > > intel_engine_remove_wait(req->engine, &wait); > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index b81f6709905c..5f73d8c0a38a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); > void __i915_gem_request_submit(struct drm_i915_gem_request *request); > void i915_gem_request_submit(struct drm_i915_gem_request *request); > > +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); > +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); > + > struct intel_rps_client; > #define NO_WAITBOOST ERR_PTR(-1) > #define IS_RPS_CLIENT(p) (!IS_ERR(p)) > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c > index 882e601ebb09..5bcad7872c08 100644 > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c > @@ -453,7 +453,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, > spin_unlock_irq(&b->lock); > } > > -static bool signal_complete(struct drm_i915_gem_request *request) > +static bool signal_valid(const struct drm_i915_gem_request *request) > +{ > + u32 seqno = READ_ONCE(request->global_seqno); > + > + return seqno == request->signaling.wait.seqno; > +} > + > +static bool signal_complete(const struct drm_i915_gem_request *request) > { > if (!request) > return false; > @@ -462,7 +469,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) > * signalled that this wait is already completed. > */ > if (intel_wait_complete(&request->signaling.wait)) > - return true; > + return signal_valid(request); > > /* Carefully check if the request is complete, giving time for the > * seqno to be visible or if the GPU hung. > @@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg) > > i915_gem_request_put(request); > } else { > + DEFINE_WAIT(exec); > + > if (kthread_should_stop()) { > GEM_BUG_ON(request); > break; > } > > + if (request) > + add_wait_queue(&request->execute, &exec); > + > schedule(); > > + if (request) > + remove_wait_queue(&request->execute, &exec); > + Not directly related but made me think why we are using TASK_INTERRUPTIBLE in the signallers? Shouldn't it be TASK_UNINTERRUPTIBLE and io_schedule? Sounds a bit deja vu though, maybe we have talked about it before.. > if (kthread_should_park()) > kthread_parkme(); > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 45d2c2fa946e..97fde79167a6 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -582,12 +582,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) > /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ > int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); > > -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) > -{ > - wait->tsk = current; > - wait->seqno = seqno; > -} > - > static inline bool intel_wait_complete(const struct intel_wait *wait) > { > return RB_EMPTY_NODE(&wait->node); > diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c > index 6426acc9fdca..62c020c7ea80 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c > @@ -28,6 +28,12 @@ > #include "mock_gem_device.h" > #include "mock_engine.h" > > +static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) > +{ > + wait->tsk = current; > + wait->seqno = seqno; > +} > + > static int check_rbtree(struct intel_engine_cs *engine, > const unsigned long *bitmap, > const struct intel_wait *waiters, > Regards, Tvrtko
On Wed, Feb 22, 2017 at 01:33:22PM +0000, Tvrtko Ursulin wrote: > > On 22/02/2017 11:46, Chris Wilson wrote: > >+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) > >+{ > >+ struct intel_engine_cs *engine = request->engine; > >+ struct intel_timeline *timeline; > >+ > >+ assert_spin_locked(&engine->timeline->lock); > >+ > >+ /* Only unwind in reverse order, required so that the per-context list > >+ * is kept in seqno/ring order. > >+ */ > >+ GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); > >+ engine->timeline->seqno--; > >+ > >+ /* We may be recursing from the signal callback of another i915 fence */ > > Copy-paste of the comment of there will really be preemption > triggered from the signal callback? I believe it may be. Say an RCS request was waiting on a BCS request, and we decide to preempt, and can do so immediately. I think being prepared for the same recursion here is predundant. > > static int __i915_sw_fence_call > > submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) > > { > >@@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, > > if (flags & I915_WAIT_LOCKED) > > add_wait_queue(errq, &reset); > > > >- intel_wait_init(&wait, i915_gem_request_global_seqno(req)); > >+ wait.tsk = current; > > > >+restart: > > reset_wait_queue(&req->execute, &exec); > >+ wait.seqno = i915_gem_request_global_seqno(req); > > Not sure if it is worth dropping intel_wait_init, I presume to avoid > assigning the task twice? It will still be the same task so just > moving the intel_wait_init here would be clearer. I was thinking the opposite, since we are looking at wait.seqno directly elsewhere, so wanted that to be clear. And current is in a special register, so why pay the cost to reload it onto stack :) > >@@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg) > > > > i915_gem_request_put(request); > > } else { > >+ DEFINE_WAIT(exec); > >+ > > if (kthread_should_stop()) { > > GEM_BUG_ON(request); > > break; > > } > > > >+ if (request) > >+ add_wait_queue(&request->execute, &exec); > >+ > > schedule(); > > > >+ if (request) > >+ remove_wait_queue(&request->execute, &exec); > >+ > > Not directly related but made me think why we are using > TASK_INTERRUPTIBLE in the signallers? Shouldn't it be > TASK_UNINTERRUPTIBLE and io_schedule? Sounds a bit deja vu though, > maybe we have talked about it before.. It doesn't make any difference to the signalers are they are kthreads and shouldn't be interrupted - but it does make a difference to the reported load as TASK_UNINTERRUPTIBLE contribute to system load. -Chris
On 22/02/2017 13:40, Chris Wilson wrote: > On Wed, Feb 22, 2017 at 01:33:22PM +0000, Tvrtko Ursulin wrote: >> >> On 22/02/2017 11:46, Chris Wilson wrote: >>> +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) >>> +{ >>> + struct intel_engine_cs *engine = request->engine; >>> + struct intel_timeline *timeline; >>> + >>> + assert_spin_locked(&engine->timeline->lock); >>> + >>> + /* Only unwind in reverse order, required so that the per-context list >>> + * is kept in seqno/ring order. >>> + */ >>> + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); >>> + engine->timeline->seqno--; >>> + >>> + /* We may be recursing from the signal callback of another i915 fence */ >> >> Copy-paste of the comment of there will really be preemption >> triggered from the signal callback? > > I believe it may be. Say an RCS request was waiting on a BCS request, > and we decide to preempt, and can do so immediately. I think being > prepared for the same recursion here is predundant. Yeah OK, just wasn't sure at which level will we handle preemption. >>> static int __i915_sw_fence_call >>> submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) >>> { >>> @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, >>> if (flags & I915_WAIT_LOCKED) >>> add_wait_queue(errq, &reset); >>> >>> - intel_wait_init(&wait, i915_gem_request_global_seqno(req)); >>> + wait.tsk = current; >>> >>> +restart: >>> reset_wait_queue(&req->execute, &exec); >>> + wait.seqno = i915_gem_request_global_seqno(req); >> >> Not sure if it is worth dropping intel_wait_init, I presume to avoid >> assigning the task twice? It will still be the same task so just >> moving the intel_wait_init here would be clearer. > > I was thinking the opposite, since we are looking at wait.seqno directly > elsewhere, so wanted that to be clear. And current is in a special > register, so why pay the cost to reload it onto stack :) I can see that but intel_wait_init was so nice as a marker when reading the code. Maybe leave it and add intel_wait_update_seqno? Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d18f450977e0..97116e492d01 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + + assert_spin_locked(&engine->timeline->lock); + + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait, i915_gem_request_global_seqno(req)); + wait.tsk = current; +restart: reset_wait_queue(&req->execute, &exec); + wait.seqno = i915_gem_request_global_seqno(req); if (!wait.seqno) { do { set_current_state(state); @@ -1135,6 +1186,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (i915_gem_request_global_seqno(req) != wait.seqno) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index b81f6709905c..5f73d8c0a38a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 882e601ebb09..5bcad7872c08 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -453,7 +453,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + u32 seqno = READ_ONCE(request->global_seqno); + + return seqno == request->signaling.wait.seqno; +} + +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -462,7 +469,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg) i915_gem_request_put(request); } else { + DEFINE_WAIT(exec); + if (kthread_should_stop()) { GEM_BUG_ON(request); break; } + if (request) + add_wait_queue(&request->execute, &exec); + schedule(); + if (request) + remove_wait_queue(&request->execute, &exec); + if (kthread_should_park()) kthread_parkme(); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 45d2c2fa946e..97fde79167a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -582,12 +582,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) -{ - wait->tsk = current; - wait->seqno = seqno; -} - static inline bool intel_wait_complete(const struct intel_wait *wait) { return RB_EMPTY_NODE(&wait->node); diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 6426acc9fdca..62c020c7ea80 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -28,6 +28,12 @@ #include "mock_gem_device.h" #include "mock_engine.h" +static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +{ + wait->tsk = current; + wait->seqno = seqno; +} + static int check_rbtree(struct intel_engine_cs *engine, const unsigned long *bitmap, const struct intel_wait *waiters,
After the request is cancelled, we then need to remove it from the global execution timeline and return it to the context timeline, the inverse of submit_request(). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem_request.c | 58 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_request.h | 3 ++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 ++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 --- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 6 +++ 5 files changed, 83 insertions(+), 9 deletions(-)