Message ID | 20190408091728.20207-11-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/29] drm/i915: Mark up ips for RCU protection | expand |
On 08/04/2019 10:17, Chris Wilson wrote: > Start acquiring the logical intel_context and using that as our primary > means for request allocation. This is the initial step to allow us to > avoid requiring struct_mutex for request allocation along the > perma-pinned kernel context, but it also provides a foundation for > breaking up the complex request allocation to handle different scenarios > inside execbuf. > > For the purpose of emitting a request from inside retirement (see the > next patch for engine power management), we also need to lift control > over the timeline mutex to the caller. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_context.h | 12 + > drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- > drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 - > drivers/gpu/drm/i915/i915_gem_context.c | 4 +- > drivers/gpu/drm/i915/i915_perf.c | 2 +- > drivers/gpu/drm/i915/i915_request.c | 245 ++++++++++-------- > drivers/gpu/drm/i915/i915_request.h | 7 + > drivers/gpu/drm/i915/intel_overlay.c | 5 +- > drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- > .../drm/i915/selftests/i915_gem_coherency.c | 2 +- > .../gpu/drm/i915/selftests/i915_gem_object.c | 2 +- > drivers/gpu/drm/i915/selftests/i915_request.c | 9 +- > .../gpu/drm/i915/selftests/i915_timeline.c | 4 +- > 13 files changed, 175 insertions(+), 124 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index 95b1fdc5826a..9aeef88176b9 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -104,4 +104,16 @@ static inline void intel_context_put(struct intel_context *ce) > kref_put(&ce->ref, ce->ops->destroy); > } > > +static inline void intel_context_timeline_lock(struct intel_context *ce) > + __acquires(&ce->ring->timeline->mutex) > +{ > + mutex_lock(&ce->ring->timeline->mutex); > +} > + > +static inline void intel_context_timeline_unlock(struct intel_context *ce) > + __releases(&ce->ring->timeline->mutex) > +{ > + mutex_unlock(&ce->ring->timeline->mutex); > +} > + > #endif /* __INTEL_CONTEXT_H__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > index dce49b8b1118..a5501b136f0c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -770,7 +770,7 @@ static void restart_work(struct work_struct *work) > if (!intel_engine_is_idle(engine)) > continue; > > - rq = i915_request_alloc(engine, i915->kernel_context); > + rq = i915_request_create(engine->kernel_context); > if (!IS_ERR(rq)) > i915_request_add(rq); > } > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > index 5788e5ab818f..63bf092d737b 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > @@ -1778,7 +1778,6 @@ static int switch_context(struct i915_request *rq) > u32 hw_flags = 0; > int ret, i; > > - lockdep_assert_held(&rq->i915->drm.struct_mutex); > GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); > > if (ppgtt) { > @@ -1908,8 +1907,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) > struct i915_request *target; > long timeout; > > - lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); > - > if (intel_ring_update_space(ring) >= bytes) > return 0; > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 170500d49049..de3404479bc3 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -942,7 +942,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, > struct intel_ring *ring; > struct i915_request *rq; > > - rq = i915_request_alloc(engine, i915->kernel_context); > + rq = i915_request_create(engine->kernel_context); > if (IS_ERR(rq)) > return PTR_ERR(rq); > > @@ -1188,7 +1188,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) > /* Submitting requests etc needs the hw awake. */ > wakeref = intel_runtime_pm_get(i915); > > - rq = i915_request_alloc(ce->engine, i915->kernel_context); > + rq = i915_request_create(ce->engine->kernel_context); > if (IS_ERR(rq)) { > ret = PTR_ERR(rq); > goto out_put; > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index a87f790335c1..328a740e72cb 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1762,7 +1762,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > * Apply the configuration by doing one context restore of the edited > * context image. > */ > - rq = i915_request_alloc(engine, dev_priv->kernel_context); > + rq = i915_request_create(engine->kernel_context); > if (IS_ERR(rq)) > return PTR_ERR(rq); > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index e72e4087bbfe..ca4c200ee7ec 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -550,7 +550,7 @@ static void ring_retire_requests(struct intel_ring *ring) > } > > static noinline struct i915_request * > -i915_request_alloc_slow(struct intel_context *ce) > +request_alloc_slow(struct intel_context *ce, gfp_t gfp) > { > struct intel_ring *ring = ce->ring; > struct i915_request *rq; > @@ -558,6 +558,9 @@ i915_request_alloc_slow(struct intel_context *ce) > if (list_empty(&ring->request_list)) > goto out; > > + if (!gfpflags_allow_blocking(gfp)) > + goto out; > + > /* Ratelimit ourselves to prevent oom from malicious clients */ > rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); > cond_synchronize_rcu(rq->rcustate); > @@ -566,62 +569,21 @@ i915_request_alloc_slow(struct intel_context *ce) > ring_retire_requests(ring); > > out: > - return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); > + return kmem_cache_alloc(global.slab_requests, gfp); > } > > -/** > - * i915_request_alloc - allocate a request structure > - * > - * @engine: engine that we wish to issue the request on. > - * @ctx: context that the request will be associated with. > - * > - * Returns a pointer to the allocated request if successful, > - * or an error code if not. > - */ > struct i915_request * > -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > +__i915_request_create(struct intel_context *ce, gfp_t gfp) > { > - struct drm_i915_private *i915 = engine->i915; > - struct intel_context *ce; > - struct i915_timeline *tl; > + struct i915_timeline *tl = ce->ring->timeline; > struct i915_request *rq; > u32 seqno; > int ret; > > - lockdep_assert_held(&i915->drm.struct_mutex); > - > - /* > - * Preempt contexts are reserved for exclusive use to inject a > - * preemption context switch. They are never to be used for any trivial > - * request! > - */ > - GEM_BUG_ON(ctx == i915->preempt_context); > - > - /* > - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report > - * EIO if the GPU is already wedged. > - */ > - ret = i915_terminally_wedged(i915); > - if (ret) > - return ERR_PTR(ret); > - > - /* > - * Pinning the contexts may generate requests in order to acquire > - * GGTT space, so do this first before we reserve a seqno for > - * ourselves. > - */ > - ce = intel_context_pin(ctx, engine); > - if (IS_ERR(ce)) > - return ERR_CAST(ce); > - > - mutex_lock(&ce->ring->timeline->mutex); > - intel_context_enter(ce); > + might_sleep_if(gfpflags_allow_blocking(gfp)); > > - /* Move our oldest request to the slab-cache (if not in use!) */ > - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); > - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && > - i915_request_completed(rq)) > - i915_request_retire(rq); > + /* Check that the caller provided an already pinned context */ > + __intel_context_pin(ce); > > /* > * Beware: Dragons be flying overhead. > @@ -653,30 +615,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > * Do not use kmem_cache_zalloc() here! > */ > rq = kmem_cache_alloc(global.slab_requests, > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); > + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); > if (unlikely(!rq)) { > - rq = i915_request_alloc_slow(ce); > + rq = request_alloc_slow(ce, gfp); > if (!rq) { > ret = -ENOMEM; > goto err_unreserve; > } > } > > - INIT_LIST_HEAD(&rq->active_list); > - INIT_LIST_HEAD(&rq->execute_cb); > - > - tl = ce->ring->timeline; > ret = i915_timeline_get_seqno(tl, rq, &seqno); > if (ret) > goto err_free; > > - rq->i915 = i915; > - rq->engine = engine; > - rq->gem_context = ctx; > + rq->i915 = ce->engine->i915; > rq->hw_context = ce; > + rq->gem_context = ce->gem_context; > + rq->engine = ce->engine; > rq->ring = ce->ring; > rq->timeline = tl; > - GEM_BUG_ON(rq->timeline == &engine->timeline); > + GEM_BUG_ON(rq->timeline == &ce->engine->timeline); > rq->hwsp_seqno = tl->hwsp_seqno; > rq->hwsp_cacheline = tl->hwsp_cacheline; > rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ > @@ -696,6 +654,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > rq->capture_list = NULL; > rq->waitboost = false; > > + INIT_LIST_HEAD(&rq->active_list); > + INIT_LIST_HEAD(&rq->execute_cb); > + > /* > * Reserve space in the ring buffer for all the commands required to > * eventually emit this request. This is to guarantee that the > @@ -708,7 +669,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > * around inside i915_request_add() there is sufficient space at > * the beginning of the ring as well. > */ > - rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); > + rq->reserved_space = > + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); > > /* > * Record the position of the start of the request so that > @@ -718,20 +680,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > */ > rq->head = rq->ring->emit; > > - ret = engine->request_alloc(rq); > + ret = rq->engine->request_alloc(rq); > if (ret) > goto err_unwind; > > - /* Keep a second pin for the dual retirement along engine and ring */ > - __intel_context_pin(ce); > - > rq->infix = rq->ring->emit; /* end of header; start of user payload */ > > - /* Check that we didn't interrupt ourselves with a new request */ > - lockdep_assert_held(&rq->timeline->mutex); > - GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); > - rq->cookie = lockdep_pin_lock(&rq->timeline->mutex); > + /* Keep a second pin for the dual retirement along engine and ring */ > + __intel_context_pin(ce); > > + intel_context_mark_active(ce); > return rq; > > err_unwind: > @@ -745,12 +703,86 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > err_free: > kmem_cache_free(global.slab_requests, rq); > err_unreserve: > - intel_context_exit(ce); > - mutex_unlock(&ce->ring->timeline->mutex); > intel_context_unpin(ce); > return ERR_PTR(ret); > } > > +struct i915_request * > +i915_request_create(struct intel_context *ce) > +{ > + struct i915_request *rq; > + > + intel_context_timeline_lock(ce); > + > + /* Move our oldest request to the slab-cache (if not in use!) */ > + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); > + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && > + i915_request_completed(rq)) > + i915_request_retire(rq); > + > + intel_context_enter(ce); > + rq = __i915_request_create(ce, GFP_KERNEL); > + intel_context_exit(ce); I think a comment here saying __i915_request_create has a reference to the context now so we can drop the local one would be helpful. > + if (IS_ERR(rq)) > + goto err_unlock; > + > + /* Check that we do not interrupt ourselves with a new request */ > + rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); > + > + return rq; > + > +err_unlock: > + intel_context_timeline_unlock(ce); > + return rq; > +} > + > +/** > + * i915_request_alloc - allocate a request structure > + * > + * @engine: engine that we wish to issue the request on. > + * @ctx: context that the request will be associated with. > + * > + * Returns a pointer to the allocated request if successful, > + * or an error code if not. > + */ > +struct i915_request * > +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > +{ > + struct drm_i915_private *i915 = engine->i915; > + struct intel_context *ce; > + struct i915_request *rq; > + int ret; > + > + /* > + * Preempt contexts are reserved for exclusive use to inject a > + * preemption context switch. They are never to be used for any trivial > + * request! > + */ > + GEM_BUG_ON(ctx == i915->preempt_context); > + > + /* > + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report > + * EIO if the GPU is already wedged. > + */ > + ret = i915_terminally_wedged(i915); > + if (ret) > + return ERR_PTR(ret); > + > + /* > + * Pinning the contexts may generate requests in order to acquire > + * GGTT space, so do this first before we reserve a seqno for > + * ourselves. > + */ > + ce = intel_context_pin(ctx, engine); > + if (IS_ERR(ce)) > + return ERR_CAST(ce); > + > + rq = i915_request_create(ce); > + intel_context_unpin(ce); > + > + return rq; > +} > + > static int > emit_semaphore_wait(struct i915_request *to, > struct i915_request *from, > @@ -1005,8 +1037,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) > * precludes optimising to use semaphores serialisation of a single > * timeline across engines. > */ > - prev = i915_active_request_raw(&timeline->last_request, > - &rq->i915->drm.struct_mutex); > + prev = rcu_dereference_protected(timeline->last_request.request, 1); > if (prev && !i915_request_completed(prev)) { > if (is_power_of_2(prev->engine->mask | rq->engine->mask)) > i915_sw_fence_await_sw_fence(&rq->submit, > @@ -1027,6 +1058,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) > list_add_tail(&rq->link, &timeline->requests); > spin_unlock_irq(&timeline->lock); > > + /* > + * Make sure that no request gazumped us - if it was allocated after > + * our i915_request_alloc() and called __i915_request_add() before > + * us, the timeline will hold its seqno which is later than ours. > + */ > GEM_BUG_ON(timeline->seqno != rq->fence.seqno); > __i915_active_request_set(&timeline->last_request, rq); > > @@ -1038,36 +1074,23 @@ __i915_request_add_to_timeline(struct i915_request *rq) > * request is not being tracked for completion but the work itself is > * going to happen on the hardware. This would be a Bad Thing(tm). > */ > -void i915_request_add(struct i915_request *request) > +struct i915_request *__i915_request_commit(struct i915_request *rq) > { > - struct intel_engine_cs *engine = request->engine; > - struct i915_timeline *timeline = request->timeline; > - struct intel_ring *ring = request->ring; > + struct intel_engine_cs *engine = rq->engine; > + struct intel_ring *ring = rq->ring; > struct i915_request *prev; > u32 *cs; > > GEM_TRACE("%s fence %llx:%lld\n", > - engine->name, request->fence.context, request->fence.seqno); > - > - lockdep_assert_held(&request->timeline->mutex); > - lockdep_unpin_lock(&request->timeline->mutex, request->cookie); > - > - trace_i915_request_add(request); > - > - /* > - * Make sure that no request gazumped us - if it was allocated after > - * our i915_request_alloc() and called __i915_request_add() before > - * us, the timeline will hold its seqno which is later than ours. > - */ > - GEM_BUG_ON(timeline->seqno != request->fence.seqno); > + engine->name, rq->fence.context, rq->fence.seqno); > > /* > * To ensure that this call will not fail, space for its emissions > * should already have been reserved in the ring buffer. Let the ring > * know that it is time to use that space up. > */ > - GEM_BUG_ON(request->reserved_space > request->ring->space); > - request->reserved_space = 0; > + GEM_BUG_ON(rq->reserved_space > ring->space); > + rq->reserved_space = 0; > > /* > * Record the position of the start of the breadcrumb so that > @@ -1075,17 +1098,17 @@ void i915_request_add(struct i915_request *request) > * GPU processing the request, we never over-estimate the > * position of the ring's HEAD. > */ > - cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); > + cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); > GEM_BUG_ON(IS_ERR(cs)); > - request->postfix = intel_ring_offset(request, cs); > + rq->postfix = intel_ring_offset(rq, cs); > > - prev = __i915_request_add_to_timeline(request); > + prev = __i915_request_add_to_timeline(rq); > > - list_add_tail(&request->ring_link, &ring->request_list); > - if (list_is_first(&request->ring_link, &ring->request_list)) > - list_add(&ring->active_link, &request->i915->gt.active_rings); > - request->i915->gt.active_engines |= request->engine->mask; > - request->emitted_jiffies = jiffies; > + list_add_tail(&rq->ring_link, &ring->request_list); > + if (list_is_first(&rq->ring_link, &ring->request_list)) > + list_add(&ring->active_link, &rq->i915->gt.active_rings); > + rq->i915->gt.active_engines |= rq->engine->mask; > + rq->emitted_jiffies = jiffies; > > /* > * Let the backend know a new request has arrived that may need > @@ -1101,7 +1124,7 @@ void i915_request_add(struct i915_request *request) > local_bh_disable(); > rcu_read_lock(); /* RCU serialisation for set-wedged protection */ > if (engine->schedule) { > - struct i915_sched_attr attr = request->gem_context->sched; > + struct i915_sched_attr attr = rq->gem_context->sched; > > /* > * Boost actual workloads past semaphores! > @@ -1115,7 +1138,7 @@ void i915_request_add(struct i915_request *request) > * far in the distance past over useful work, we keep a history > * of any semaphore use along our dependency chain. > */ > - if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) > + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) > attr.priority |= I915_PRIORITY_NOSEMAPHORE; > > /* > @@ -1124,15 +1147,29 @@ void i915_request_add(struct i915_request *request) > * Allow interactive/synchronous clients to jump ahead of > * the bulk clients. (FQ_CODEL) > */ > - if (list_empty(&request->sched.signalers_list)) > + if (list_empty(&rq->sched.signalers_list)) > attr.priority |= I915_PRIORITY_NEWCLIENT; > > - engine->schedule(request, &attr); > + engine->schedule(rq, &attr); > } > rcu_read_unlock(); > - i915_sw_fence_commit(&request->submit); > + i915_sw_fence_commit(&rq->submit); > local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ > > + return prev; > +} > + > +void i915_request_add(struct i915_request *rq) > +{ > + struct i915_request *prev; > + > + lockdep_assert_held(&rq->timeline->mutex); > + lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); > + > + trace_i915_request_add(rq); > + > + prev = __i915_request_commit(rq); > + > /* > * In typical scenarios, we do not expect the previous request on > * the timeline to be still tracked by timeline->last_request if it > @@ -1153,7 +1190,7 @@ void i915_request_add(struct i915_request *request) > if (prev && i915_request_completed(prev)) > i915_request_retire_upto(prev); > > - mutex_unlock(&request->timeline->mutex); > + mutex_unlock(&rq->timeline->mutex); > } > > static unsigned long local_clock_us(unsigned int *cpu) > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 875be6f71412..9a50072171f8 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -238,6 +238,13 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) > return fence->ops == &i915_fence_ops; > } > > +struct i915_request * __must_check > +__i915_request_create(struct intel_context *ce, gfp_t gfp); > +struct i915_request * __must_check > +i915_request_create(struct intel_context *ce); > + > +struct i915_request *__i915_request_commit(struct i915_request *request); > + > struct i915_request * __must_check > i915_request_alloc(struct intel_engine_cs *engine, > struct i915_gem_context *ctx); > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index a882b8d42bd9..fea8c5869da9 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -235,10 +235,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, > > static struct i915_request *alloc_request(struct intel_overlay *overlay) > { > - struct drm_i915_private *dev_priv = overlay->i915; > - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; > + struct intel_engine_cs *engine = overlay->i915->engine[RCS0]; > > - return i915_request_alloc(engine, dev_priv->kernel_context); > + return i915_request_create(engine->kernel_context); > } > > /* overlay needs to be disable in OCMD reg */ > diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c > index 27d8f853111b..eee838dc0634 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_active.c > +++ b/drivers/gpu/drm/i915/selftests/i915_active.c > @@ -46,7 +46,7 @@ static int __live_active_setup(struct drm_i915_private *i915, > for_each_engine(engine, i915, id) { > struct i915_request *rq; > > - rq = i915_request_alloc(engine, i915->kernel_context); > + rq = i915_request_create(engine->kernel_context); > if (IS_ERR(rq)) { > err = PTR_ERR(rq); > break; > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c > index e43630b40fce..046a38743152 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c > @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > return PTR_ERR(vma); > > - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); > + rq = i915_request_create(i915->engine[RCS0]->kernel_context); > if (IS_ERR(rq)) { > i915_vma_unpin(vma); > return PTR_ERR(rq); > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c > index 12fc53c694a6..12203d665a4e 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c > @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) > if (err) > return err; > > - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); > + rq = i915_request_create(i915->engine[RCS0]->kernel_context); > if (IS_ERR(rq)) { > i915_vma_unpin(vma); > return PTR_ERR(rq); > diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c > index e6ffe2240126..098d7b3aa131 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_request.c > +++ b/drivers/gpu/drm/i915/selftests/i915_request.c > @@ -551,8 +551,7 @@ static int live_nop_request(void *arg) > times[1] = ktime_get_raw(); > > for (n = 0; n < prime; n++) { > - request = i915_request_alloc(engine, > - i915->kernel_context); > + request = i915_request_create(engine->kernel_context); > if (IS_ERR(request)) { > err = PTR_ERR(request); > goto out_unlock; > @@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine, > struct i915_request *request; > int err; > > - request = i915_request_alloc(engine, engine->i915->kernel_context); > + request = i915_request_create(engine->kernel_context); > if (IS_ERR(request)) > return request; > > @@ -853,7 +852,7 @@ static int live_all_engines(void *arg) > } > > for_each_engine(engine, i915, id) { > - request[id] = i915_request_alloc(engine, i915->kernel_context); > + request[id] = i915_request_create(engine->kernel_context); > if (IS_ERR(request[id])) { > err = PTR_ERR(request[id]); > pr_err("%s: Request allocation failed with err=%d\n", > @@ -962,7 +961,7 @@ static int live_sequential_engines(void *arg) > goto out_unlock; > } > > - request[id] = i915_request_alloc(engine, i915->kernel_context); > + request[id] = i915_request_create(engine->kernel_context); > if (IS_ERR(request[id])) { > err = PTR_ERR(request[id]); > pr_err("%s: Request allocation failed for %s with err=%d\n", > diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c > index 8e7bcaa1eb66..227479349c61 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c > +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c > @@ -454,7 +454,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) > goto out; > } > > - rq = i915_request_alloc(engine, engine->i915->kernel_context); > + rq = i915_request_create(engine->kernel_context); > if (IS_ERR(rq)) > goto out_unpin; > > @@ -678,7 +678,7 @@ static int live_hwsp_wrap(void *arg) > if (!intel_engine_can_store_dword(engine)) > continue; > > - rq = i915_request_alloc(engine, i915->kernel_context); > + rq = i915_request_create(engine->kernel_context); > if (IS_ERR(rq)) { > err = PTR_ERR(rq); > goto out; > With the comment added: Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 95b1fdc5826a..9aeef88176b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -104,4 +104,16 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } +static inline void intel_context_timeline_lock(struct intel_context *ce) + __acquires(&ce->ring->timeline->mutex) +{ + mutex_lock(&ce->ring->timeline->mutex); +} + +static inline void intel_context_timeline_unlock(struct intel_context *ce) + __releases(&ce->ring->timeline->mutex) +{ + mutex_unlock(&ce->ring->timeline->mutex); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index dce49b8b1118..a5501b136f0c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -770,7 +770,7 @@ static void restart_work(struct work_struct *work) if (!intel_engine_is_idle(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (!IS_ERR(rq)) i915_request_add(rq); } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 5788e5ab818f..63bf092d737b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1778,7 +1778,6 @@ static int switch_context(struct i915_request *rq) u32 hw_flags = 0; int ret, i; - lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (ppgtt) { @@ -1908,8 +1907,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) struct i915_request *target; long timeout; - lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); - if (intel_ring_update_space(ring) >= bytes) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 170500d49049..de3404479bc3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -942,7 +942,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, struct intel_ring *ring; struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1188,7 +1188,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) /* Submitting requests etc needs the hw awake. */ wakeref = intel_runtime_pm_get(i915); - rq = i915_request_alloc(ce->engine, i915->kernel_context); + rq = i915_request_create(ce->engine->kernel_context); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto out_put; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a87f790335c1..328a740e72cb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1762,7 +1762,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_alloc(engine, dev_priv->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e72e4087bbfe..ca4c200ee7ec 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -550,7 +550,7 @@ static void ring_retire_requests(struct intel_ring *ring) } static noinline struct i915_request * -i915_request_alloc_slow(struct intel_context *ce) +request_alloc_slow(struct intel_context *ce, gfp_t gfp) { struct intel_ring *ring = ce->ring; struct i915_request *rq; @@ -558,6 +558,9 @@ i915_request_alloc_slow(struct intel_context *ce) if (list_empty(&ring->request_list)) goto out; + if (!gfpflags_allow_blocking(gfp)) + goto out; + /* Ratelimit ourselves to prevent oom from malicious clients */ rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); cond_synchronize_rcu(rq->rcustate); @@ -566,62 +569,21 @@ i915_request_alloc_slow(struct intel_context *ce) ring_retire_requests(ring); out: - return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); + return kmem_cache_alloc(global.slab_requests, gfp); } -/** - * i915_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ struct i915_request * -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +__i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct drm_i915_private *i915 = engine->i915; - struct intel_context *ce; - struct i915_timeline *tl; + struct i915_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == i915->preempt_context); - - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = intel_context_pin(ctx, engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - mutex_lock(&ce->ring->timeline->mutex); - intel_context_enter(ce); + might_sleep_if(gfpflags_allow_blocking(gfp)); - /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && - i915_request_completed(rq)) - i915_request_retire(rq); + /* Check that the caller provided an already pinned context */ + __intel_context_pin(ce); /* * Beware: Dragons be flying overhead. @@ -653,30 +615,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * Do not use kmem_cache_zalloc() here! */ rq = kmem_cache_alloc(global.slab_requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = i915_request_alloc_slow(ce); + rq = request_alloc_slow(ce, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - INIT_LIST_HEAD(&rq->active_list); - INIT_LIST_HEAD(&rq->execute_cb); - - tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->i915 = i915; - rq->engine = engine; - rq->gem_context = ctx; + rq->i915 = ce->engine->i915; rq->hw_context = ce; + rq->gem_context = ce->gem_context; + rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &engine->timeline); + GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -696,6 +654,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->capture_list = NULL; rq->waitboost = false; + INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->execute_cb); + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the @@ -708,7 +669,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); + rq->reserved_space = + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -718,20 +680,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; - ret = engine->request_alloc(rq); + ret = rq->engine->request_alloc(rq); if (ret) goto err_unwind; - /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(ce); - rq->infix = rq->ring->emit; /* end of header; start of user payload */ - /* Check that we didn't interrupt ourselves with a new request */ - lockdep_assert_held(&rq->timeline->mutex); - GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); - rq->cookie = lockdep_pin_lock(&rq->timeline->mutex); + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(ce); + intel_context_mark_active(ce); return rq; err_unwind: @@ -745,12 +703,86 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: - intel_context_exit(ce); - mutex_unlock(&ce->ring->timeline->mutex); intel_context_unpin(ce); return ERR_PTR(ret); } +struct i915_request * +i915_request_create(struct intel_context *ce) +{ + struct i915_request *rq; + + intel_context_timeline_lock(ce); + + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && + i915_request_completed(rq)) + i915_request_retire(rq); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_KERNEL); + intel_context_exit(ce); + if (IS_ERR(rq)) + goto err_unlock; + + /* Check that we do not interrupt ourselves with a new request */ + rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + + return rq; + +err_unlock: + intel_context_timeline_unlock(ce); + return rq; +} + +/** + * i915_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; + struct i915_request *rq; + int ret; + + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == i915->preempt_context); + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + ret = i915_terminally_wedged(i915); + if (ret) + return ERR_PTR(ret); + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = i915_request_create(ce); + intel_context_unpin(ce); + + return rq; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -1005,8 +1037,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = i915_active_request_raw(&timeline->last_request, - &rq->i915->drm.struct_mutex); + prev = rcu_dereference_protected(timeline->last_request.request, 1); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1027,6 +1058,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) list_add_tail(&rq->link, &timeline->requests); spin_unlock_irq(&timeline->lock); + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before + * us, the timeline will hold its seqno which is later than ours. + */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); __i915_active_request_set(&timeline->last_request, rq); @@ -1038,36 +1074,23 @@ __i915_request_add_to_timeline(struct i915_request *rq) * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void i915_request_add(struct i915_request *request) +struct i915_request *__i915_request_commit(struct i915_request *rq) { - struct intel_engine_cs *engine = request->engine; - struct i915_timeline *timeline = request->timeline; - struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *prev; u32 *cs; GEM_TRACE("%s fence %llx:%lld\n", - engine->name, request->fence.context, request->fence.seqno); - - lockdep_assert_held(&request->timeline->mutex); - lockdep_unpin_lock(&request->timeline->mutex, request->cookie); - - trace_i915_request_add(request); - - /* - * Make sure that no request gazumped us - if it was allocated after - * our i915_request_alloc() and called __i915_request_add() before - * us, the timeline will hold its seqno which is later than ours. - */ - GEM_BUG_ON(timeline->seqno != request->fence.seqno); + engine->name, rq->fence.context, rq->fence.seqno); /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - GEM_BUG_ON(request->reserved_space > request->ring->space); - request->reserved_space = 0; + GEM_BUG_ON(rq->reserved_space > ring->space); + rq->reserved_space = 0; /* * Record the position of the start of the breadcrumb so that @@ -1075,17 +1098,17 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); + cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); - request->postfix = intel_ring_offset(request, cs); + rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(request); + prev = __i915_request_add_to_timeline(rq); - list_add_tail(&request->ring_link, &ring->request_list); - if (list_is_first(&request->ring_link, &ring->request_list)) - list_add(&ring->active_link, &request->i915->gt.active_rings); - request->i915->gt.active_engines |= request->engine->mask; - request->emitted_jiffies = jiffies; + list_add_tail(&rq->ring_link, &ring->request_list); + if (list_is_first(&rq->ring_link, &ring->request_list)) + list_add(&ring->active_link, &rq->i915->gt.active_rings); + rq->i915->gt.active_engines |= rq->engine->mask; + rq->emitted_jiffies = jiffies; /* * Let the backend know a new request has arrived that may need @@ -1101,7 +1124,7 @@ void i915_request_add(struct i915_request *request) local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) { - struct i915_sched_attr attr = request->gem_context->sched; + struct i915_sched_attr attr = rq->gem_context->sched; /* * Boost actual workloads past semaphores! @@ -1115,7 +1138,7 @@ void i915_request_add(struct i915_request *request) * far in the distance past over useful work, we keep a history * of any semaphore use along our dependency chain. */ - if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) attr.priority |= I915_PRIORITY_NOSEMAPHORE; /* @@ -1124,15 +1147,29 @@ void i915_request_add(struct i915_request *request) * Allow interactive/synchronous clients to jump ahead of * the bulk clients. (FQ_CODEL) */ - if (list_empty(&request->sched.signalers_list)) + if (list_empty(&rq->sched.signalers_list)) attr.priority |= I915_PRIORITY_NEWCLIENT; - engine->schedule(request, &attr); + engine->schedule(rq, &attr); } rcu_read_unlock(); - i915_sw_fence_commit(&request->submit); + i915_sw_fence_commit(&rq->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + return prev; +} + +void i915_request_add(struct i915_request *rq) +{ + struct i915_request *prev; + + lockdep_assert_held(&rq->timeline->mutex); + lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + + trace_i915_request_add(rq); + + prev = __i915_request_commit(rq); + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it @@ -1153,7 +1190,7 @@ void i915_request_add(struct i915_request *request) if (prev && i915_request_completed(prev)) i915_request_retire_upto(prev); - mutex_unlock(&request->timeline->mutex); + mutex_unlock(&rq->timeline->mutex); } static unsigned long local_clock_us(unsigned int *cpu) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 875be6f71412..9a50072171f8 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -238,6 +238,13 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) return fence->ops == &i915_fence_ops; } +struct i915_request * __must_check +__i915_request_create(struct intel_context *ce, gfp_t gfp); +struct i915_request * __must_check +i915_request_create(struct intel_context *ce); + +struct i915_request *__i915_request_commit(struct i915_request *request); + struct i915_request * __must_check i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a882b8d42bd9..fea8c5869da9 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -235,10 +235,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static struct i915_request *alloc_request(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; + struct intel_engine_cs *engine = overlay->i915->engine[RCS0]; - return i915_request_alloc(engine, dev_priv->kernel_context); + return i915_request_create(engine->kernel_context); } /* overlay needs to be disable in OCMD reg */ diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 27d8f853111b..eee838dc0634 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -46,7 +46,7 @@ static int __live_active_setup(struct drm_i915_private *i915, for_each_engine(engine, i915, id) { struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index e43630b40fce..046a38743152 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 12fc53c694a6..12203d665a4e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index e6ffe2240126..098d7b3aa131 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -551,8 +551,7 @@ static int live_nop_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { - request = i915_request_alloc(engine, - i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) { err = PTR_ERR(request); goto out_unlock; @@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine, struct i915_request *request; int err; - request = i915_request_alloc(engine, engine->i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) return request; @@ -853,7 +852,7 @@ static int live_all_engines(void *arg) } for_each_engine(engine, i915, id) { - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed with err=%d\n", @@ -962,7 +961,7 @@ static int live_sequential_engines(void *arg) goto out_unlock; } - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed for %s with err=%d\n", diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 8e7bcaa1eb66..227479349c61 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -454,7 +454,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) goto out; } - rq = i915_request_alloc(engine, engine->i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) goto out_unpin; @@ -678,7 +678,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out;
Start acquiring the logical intel_context and using that as our primary means for request allocation. This is the initial step to allow us to avoid requiring struct_mutex for request allocation along the perma-pinned kernel context, but it also provides a foundation for breaking up the complex request allocation to handle different scenarios inside execbuf. For the purpose of emitting a request from inside retirement (see the next patch for engine power management), we also need to lift control over the timeline mutex to the caller. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gt/intel_context.h | 12 + drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 - drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 245 ++++++++++-------- drivers/gpu/drm/i915/i915_request.h | 7 + drivers/gpu/drm/i915/intel_overlay.c | 5 +- drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- .../drm/i915/selftests/i915_gem_coherency.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 9 +- .../gpu/drm/i915/selftests/i915_timeline.c | 4 +- 13 files changed, 175 insertions(+), 124 deletions(-)