diff mbox series

[11/29] drm/i915: Pass intel_context to i915_request_create()

Message ID 20190408091728.20207-11-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/29] drm/i915: Mark up ips for RCU protection | expand

Commit Message

Chris Wilson April 8, 2019, 9:17 a.m. UTC
Start acquiring the logical intel_context and using that as our primary
means for request allocation. This is the initial step to allow us to
avoid requiring struct_mutex for request allocation along the
perma-pinned kernel context, but it also provides a foundation for
breaking up the complex request allocation to handle different scenarios
inside execbuf.

For the purpose of emitting a request from inside retirement (see the
next patch for engine power management), we also need to lift control
over the timeline mutex to the caller.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.h       |  12 +
 drivers/gpu/drm/i915/gt/intel_reset.c         |   2 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   3 -
 drivers/gpu/drm/i915/i915_gem_context.c       |   4 +-
 drivers/gpu/drm/i915/i915_perf.c              |   2 +-
 drivers/gpu/drm/i915/i915_request.c           | 245 ++++++++++--------
 drivers/gpu/drm/i915/i915_request.h           |   7 +
 drivers/gpu/drm/i915/intel_overlay.c          |   5 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |   2 +-
 .../drm/i915/selftests/i915_gem_coherency.c   |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   2 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   9 +-
 .../gpu/drm/i915/selftests/i915_timeline.c    |   4 +-
 13 files changed, 175 insertions(+), 124 deletions(-)

Comments

Tvrtko Ursulin April 10, 2019, 10:38 a.m. UTC | #1
On 08/04/2019 10:17, Chris Wilson wrote:
> Start acquiring the logical intel_context and using that as our primary
> means for request allocation. This is the initial step to allow us to
> avoid requiring struct_mutex for request allocation along the
> perma-pinned kernel context, but it also provides a foundation for
> breaking up the complex request allocation to handle different scenarios
> inside execbuf.
> 
> For the purpose of emitting a request from inside retirement (see the
> next patch for engine power management), we also need to lift control
> over the timeline mutex to the caller.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.h       |  12 +
>   drivers/gpu/drm/i915/gt/intel_reset.c         |   2 +-
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   3 -
>   drivers/gpu/drm/i915/i915_gem_context.c       |   4 +-
>   drivers/gpu/drm/i915/i915_perf.c              |   2 +-
>   drivers/gpu/drm/i915/i915_request.c           | 245 ++++++++++--------
>   drivers/gpu/drm/i915/i915_request.h           |   7 +
>   drivers/gpu/drm/i915/intel_overlay.c          |   5 +-
>   drivers/gpu/drm/i915/selftests/i915_active.c  |   2 +-
>   .../drm/i915/selftests/i915_gem_coherency.c   |   2 +-
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |   2 +-
>   drivers/gpu/drm/i915/selftests/i915_request.c |   9 +-
>   .../gpu/drm/i915/selftests/i915_timeline.c    |   4 +-
>   13 files changed, 175 insertions(+), 124 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 95b1fdc5826a..9aeef88176b9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -104,4 +104,16 @@ static inline void intel_context_put(struct intel_context *ce)
>   	kref_put(&ce->ref, ce->ops->destroy);
>   }
>   
> +static inline void intel_context_timeline_lock(struct intel_context *ce)
> +	__acquires(&ce->ring->timeline->mutex)
> +{
> +	mutex_lock(&ce->ring->timeline->mutex);
> +}
> +
> +static inline void intel_context_timeline_unlock(struct intel_context *ce)
> +	__releases(&ce->ring->timeline->mutex)
> +{
> +	mutex_unlock(&ce->ring->timeline->mutex);
> +}
> +
>   #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index dce49b8b1118..a5501b136f0c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -770,7 +770,7 @@ static void restart_work(struct work_struct *work)
>   		if (!intel_engine_is_idle(engine))
>   			continue;
>   
> -		rq = i915_request_alloc(engine, i915->kernel_context);
> +		rq = i915_request_create(engine->kernel_context);
>   		if (!IS_ERR(rq))
>   			i915_request_add(rq);
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 5788e5ab818f..63bf092d737b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1778,7 +1778,6 @@ static int switch_context(struct i915_request *rq)
>   	u32 hw_flags = 0;
>   	int ret, i;
>   
> -	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
>   
>   	if (ppgtt) {
> @@ -1908,8 +1907,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
>   	struct i915_request *target;
>   	long timeout;
>   
> -	lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
> -
>   	if (intel_ring_update_space(ring) >= bytes)
>   		return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 170500d49049..de3404479bc3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -942,7 +942,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
>   		struct intel_ring *ring;
>   		struct i915_request *rq;
>   
> -		rq = i915_request_alloc(engine, i915->kernel_context);
> +		rq = i915_request_create(engine->kernel_context);
>   		if (IS_ERR(rq))
>   			return PTR_ERR(rq);
>   
> @@ -1188,7 +1188,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   	/* Submitting requests etc needs the hw awake. */
>   	wakeref = intel_runtime_pm_get(i915);
>   
> -	rq = i915_request_alloc(ce->engine, i915->kernel_context);
> +	rq = i915_request_create(ce->engine->kernel_context);
>   	if (IS_ERR(rq)) {
>   		ret = PTR_ERR(rq);
>   		goto out_put;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index a87f790335c1..328a740e72cb 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1762,7 +1762,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>   	 * Apply the configuration by doing one context restore of the edited
>   	 * context image.
>   	 */
> -	rq = i915_request_alloc(engine, dev_priv->kernel_context);
> +	rq = i915_request_create(engine->kernel_context);
>   	if (IS_ERR(rq))
>   		return PTR_ERR(rq);
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e72e4087bbfe..ca4c200ee7ec 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -550,7 +550,7 @@ static void ring_retire_requests(struct intel_ring *ring)
>   }
>   
>   static noinline struct i915_request *
> -i915_request_alloc_slow(struct intel_context *ce)
> +request_alloc_slow(struct intel_context *ce, gfp_t gfp)
>   {
>   	struct intel_ring *ring = ce->ring;
>   	struct i915_request *rq;
> @@ -558,6 +558,9 @@ i915_request_alloc_slow(struct intel_context *ce)
>   	if (list_empty(&ring->request_list))
>   		goto out;
>   
> +	if (!gfpflags_allow_blocking(gfp))
> +		goto out;
> +
>   	/* Ratelimit ourselves to prevent oom from malicious clients */
>   	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
>   	cond_synchronize_rcu(rq->rcustate);
> @@ -566,62 +569,21 @@ i915_request_alloc_slow(struct intel_context *ce)
>   	ring_retire_requests(ring);
>   
>   out:
> -	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
> +	return kmem_cache_alloc(global.slab_requests, gfp);
>   }
>   
> -/**
> - * i915_request_alloc - allocate a request structure
> - *
> - * @engine: engine that we wish to issue the request on.
> - * @ctx: context that the request will be associated with.
> - *
> - * Returns a pointer to the allocated request if successful,
> - * or an error code if not.
> - */
>   struct i915_request *
> -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> +__i915_request_create(struct intel_context *ce, gfp_t gfp)
>   {
> -	struct drm_i915_private *i915 = engine->i915;
> -	struct intel_context *ce;
> -	struct i915_timeline *tl;
> +	struct i915_timeline *tl = ce->ring->timeline;
>   	struct i915_request *rq;
>   	u32 seqno;
>   	int ret;
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -
> -	/*
> -	 * Preempt contexts are reserved for exclusive use to inject a
> -	 * preemption context switch. They are never to be used for any trivial
> -	 * request!
> -	 */
> -	GEM_BUG_ON(ctx == i915->preempt_context);
> -
> -	/*
> -	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> -	 * EIO if the GPU is already wedged.
> -	 */
> -	ret = i915_terminally_wedged(i915);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
> -	/*
> -	 * Pinning the contexts may generate requests in order to acquire
> -	 * GGTT space, so do this first before we reserve a seqno for
> -	 * ourselves.
> -	 */
> -	ce = intel_context_pin(ctx, engine);
> -	if (IS_ERR(ce))
> -		return ERR_CAST(ce);
> -
> -	mutex_lock(&ce->ring->timeline->mutex);
> -	intel_context_enter(ce);
> +	might_sleep_if(gfpflags_allow_blocking(gfp));
>   
> -	/* Move our oldest request to the slab-cache (if not in use!) */
> -	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
> -	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
> -	    i915_request_completed(rq))
> -		i915_request_retire(rq);
> +	/* Check that the caller provided an already pinned context */
> +	__intel_context_pin(ce);
>   
>   	/*
>   	 * Beware: Dragons be flying overhead.
> @@ -653,30 +615,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 * Do not use kmem_cache_zalloc() here!
>   	 */
>   	rq = kmem_cache_alloc(global.slab_requests,
> -			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> +			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
>   	if (unlikely(!rq)) {
> -		rq = i915_request_alloc_slow(ce);
> +		rq = request_alloc_slow(ce, gfp);
>   		if (!rq) {
>   			ret = -ENOMEM;
>   			goto err_unreserve;
>   		}
>   	}
>   
> -	INIT_LIST_HEAD(&rq->active_list);
> -	INIT_LIST_HEAD(&rq->execute_cb);
> -
> -	tl = ce->ring->timeline;
>   	ret = i915_timeline_get_seqno(tl, rq, &seqno);
>   	if (ret)
>   		goto err_free;
>   
> -	rq->i915 = i915;
> -	rq->engine = engine;
> -	rq->gem_context = ctx;
> +	rq->i915 = ce->engine->i915;
>   	rq->hw_context = ce;
> +	rq->gem_context = ce->gem_context;
> +	rq->engine = ce->engine;
>   	rq->ring = ce->ring;
>   	rq->timeline = tl;
> -	GEM_BUG_ON(rq->timeline == &engine->timeline);
> +	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
>   	rq->hwsp_seqno = tl->hwsp_seqno;
>   	rq->hwsp_cacheline = tl->hwsp_cacheline;
>   	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
> @@ -696,6 +654,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	rq->capture_list = NULL;
>   	rq->waitboost = false;
>   
> +	INIT_LIST_HEAD(&rq->active_list);
> +	INIT_LIST_HEAD(&rq->execute_cb);
> +
>   	/*
>   	 * Reserve space in the ring buffer for all the commands required to
>   	 * eventually emit this request. This is to guarantee that the
> @@ -708,7 +669,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 * around inside i915_request_add() there is sufficient space at
>   	 * the beginning of the ring as well.
>   	 */
> -	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
> +	rq->reserved_space =
> +		2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
>   
>   	/*
>   	 * Record the position of the start of the request so that
> @@ -718,20 +680,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 */
>   	rq->head = rq->ring->emit;
>   
> -	ret = engine->request_alloc(rq);
> +	ret = rq->engine->request_alloc(rq);
>   	if (ret)
>   		goto err_unwind;
>   
> -	/* Keep a second pin for the dual retirement along engine and ring */
> -	__intel_context_pin(ce);
> -
>   	rq->infix = rq->ring->emit; /* end of header; start of user payload */
>   
> -	/* Check that we didn't interrupt ourselves with a new request */
> -	lockdep_assert_held(&rq->timeline->mutex);
> -	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
> -	rq->cookie = lockdep_pin_lock(&rq->timeline->mutex);
> +	/* Keep a second pin for the dual retirement along engine and ring */
> +	__intel_context_pin(ce);
>   
> +	intel_context_mark_active(ce);
>   	return rq;
>   
>   err_unwind:
> @@ -745,12 +703,86 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   err_free:
>   	kmem_cache_free(global.slab_requests, rq);
>   err_unreserve:
> -	intel_context_exit(ce);
> -	mutex_unlock(&ce->ring->timeline->mutex);
>   	intel_context_unpin(ce);
>   	return ERR_PTR(ret);
>   }
>   
> +struct i915_request *
> +i915_request_create(struct intel_context *ce)
> +{
> +	struct i915_request *rq;
> +
> +	intel_context_timeline_lock(ce);
> +
> +	/* Move our oldest request to the slab-cache (if not in use!) */
> +	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
> +	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
> +	    i915_request_completed(rq))
> +		i915_request_retire(rq);
> +
> +	intel_context_enter(ce);
> +	rq = __i915_request_create(ce, GFP_KERNEL);
> +	intel_context_exit(ce);

I think a comment here saying __i915_request_create has a reference to 
the context now so we can drop the local one would be helpful.

> +	if (IS_ERR(rq))
> +		goto err_unlock;
> +
> +	/* Check that we do not interrupt ourselves with a new request */
> +	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
> +
> +	return rq;
> +
> +err_unlock:
> +	intel_context_timeline_unlock(ce);
> +	return rq;
> +}
> +
> +/**
> + * i915_request_alloc - allocate a request structure
> + *
> + * @engine: engine that we wish to issue the request on.
> + * @ctx: context that the request will be associated with.
> + *
> + * Returns a pointer to the allocated request if successful,
> + * or an error code if not.
> + */
> +struct i915_request *
> +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> +{
> +	struct drm_i915_private *i915 = engine->i915;
> +	struct intel_context *ce;
> +	struct i915_request *rq;
> +	int ret;
> +
> +	/*
> +	 * Preempt contexts are reserved for exclusive use to inject a
> +	 * preemption context switch. They are never to be used for any trivial
> +	 * request!
> +	 */
> +	GEM_BUG_ON(ctx == i915->preempt_context);
> +
> +	/*
> +	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> +	 * EIO if the GPU is already wedged.
> +	 */
> +	ret = i915_terminally_wedged(i915);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	/*
> +	 * Pinning the contexts may generate requests in order to acquire
> +	 * GGTT space, so do this first before we reserve a seqno for
> +	 * ourselves.
> +	 */
> +	ce = intel_context_pin(ctx, engine);
> +	if (IS_ERR(ce))
> +		return ERR_CAST(ce);
> +
> +	rq = i915_request_create(ce);
> +	intel_context_unpin(ce);
> +
> +	return rq;
> +}
> +
>   static int
>   emit_semaphore_wait(struct i915_request *to,
>   		    struct i915_request *from,
> @@ -1005,8 +1037,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>   	 * precludes optimising to use semaphores serialisation of a single
>   	 * timeline across engines.
>   	 */
> -	prev = i915_active_request_raw(&timeline->last_request,
> -				       &rq->i915->drm.struct_mutex);
> +	prev = rcu_dereference_protected(timeline->last_request.request, 1);
>   	if (prev && !i915_request_completed(prev)) {
>   		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
>   			i915_sw_fence_await_sw_fence(&rq->submit,
> @@ -1027,6 +1058,11 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>   	list_add_tail(&rq->link, &timeline->requests);
>   	spin_unlock_irq(&timeline->lock);
>   
> +	/*
> +	 * Make sure that no request gazumped us - if it was allocated after
> +	 * our i915_request_alloc() and called __i915_request_add() before
> +	 * us, the timeline will hold its seqno which is later than ours.
> +	 */
>   	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
>   	__i915_active_request_set(&timeline->last_request, rq);
>   
> @@ -1038,36 +1074,23 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>    * request is not being tracked for completion but the work itself is
>    * going to happen on the hardware. This would be a Bad Thing(tm).
>    */
> -void i915_request_add(struct i915_request *request)
> +struct i915_request *__i915_request_commit(struct i915_request *rq)
>   {
> -	struct intel_engine_cs *engine = request->engine;
> -	struct i915_timeline *timeline = request->timeline;
> -	struct intel_ring *ring = request->ring;
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct intel_ring *ring = rq->ring;
>   	struct i915_request *prev;
>   	u32 *cs;
>   
>   	GEM_TRACE("%s fence %llx:%lld\n",
> -		  engine->name, request->fence.context, request->fence.seqno);
> -
> -	lockdep_assert_held(&request->timeline->mutex);
> -	lockdep_unpin_lock(&request->timeline->mutex, request->cookie);
> -
> -	trace_i915_request_add(request);
> -
> -	/*
> -	 * Make sure that no request gazumped us - if it was allocated after
> -	 * our i915_request_alloc() and called __i915_request_add() before
> -	 * us, the timeline will hold its seqno which is later than ours.
> -	 */
> -	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
> +		  engine->name, rq->fence.context, rq->fence.seqno);
>   
>   	/*
>   	 * To ensure that this call will not fail, space for its emissions
>   	 * should already have been reserved in the ring buffer. Let the ring
>   	 * know that it is time to use that space up.
>   	 */
> -	GEM_BUG_ON(request->reserved_space > request->ring->space);
> -	request->reserved_space = 0;
> +	GEM_BUG_ON(rq->reserved_space > ring->space);
> +	rq->reserved_space = 0;
>   
>   	/*
>   	 * Record the position of the start of the breadcrumb so that
> @@ -1075,17 +1098,17 @@ void i915_request_add(struct i915_request *request)
>   	 * GPU processing the request, we never over-estimate the
>   	 * position of the ring's HEAD.
>   	 */
> -	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
> +	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
>   	GEM_BUG_ON(IS_ERR(cs));
> -	request->postfix = intel_ring_offset(request, cs);
> +	rq->postfix = intel_ring_offset(rq, cs);
>   
> -	prev = __i915_request_add_to_timeline(request);
> +	prev = __i915_request_add_to_timeline(rq);
>   
> -	list_add_tail(&request->ring_link, &ring->request_list);
> -	if (list_is_first(&request->ring_link, &ring->request_list))
> -		list_add(&ring->active_link, &request->i915->gt.active_rings);
> -	request->i915->gt.active_engines |= request->engine->mask;
> -	request->emitted_jiffies = jiffies;
> +	list_add_tail(&rq->ring_link, &ring->request_list);
> +	if (list_is_first(&rq->ring_link, &ring->request_list))
> +		list_add(&ring->active_link, &rq->i915->gt.active_rings);
> +	rq->i915->gt.active_engines |= rq->engine->mask;
> +	rq->emitted_jiffies = jiffies;
>   
>   	/*
>   	 * Let the backend know a new request has arrived that may need
> @@ -1101,7 +1124,7 @@ void i915_request_add(struct i915_request *request)
>   	local_bh_disable();
>   	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>   	if (engine->schedule) {
> -		struct i915_sched_attr attr = request->gem_context->sched;
> +		struct i915_sched_attr attr = rq->gem_context->sched;
>   
>   		/*
>   		 * Boost actual workloads past semaphores!
> @@ -1115,7 +1138,7 @@ void i915_request_add(struct i915_request *request)
>   		 * far in the distance past over useful work, we keep a history
>   		 * of any semaphore use along our dependency chain.
>   		 */
> -		if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
> +		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
>   			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
>   
>   		/*
> @@ -1124,15 +1147,29 @@ void i915_request_add(struct i915_request *request)
>   		 * Allow interactive/synchronous clients to jump ahead of
>   		 * the bulk clients. (FQ_CODEL)
>   		 */
> -		if (list_empty(&request->sched.signalers_list))
> +		if (list_empty(&rq->sched.signalers_list))
>   			attr.priority |= I915_PRIORITY_NEWCLIENT;
>   
> -		engine->schedule(request, &attr);
> +		engine->schedule(rq, &attr);
>   	}
>   	rcu_read_unlock();
> -	i915_sw_fence_commit(&request->submit);
> +	i915_sw_fence_commit(&rq->submit);
>   	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
>   
> +	return prev;
> +}
> +
> +void i915_request_add(struct i915_request *rq)
> +{
> +	struct i915_request *prev;
> +
> +	lockdep_assert_held(&rq->timeline->mutex);
> +	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
> +
> +	trace_i915_request_add(rq);
> +
> +	prev = __i915_request_commit(rq);
> +
>   	/*
>   	 * In typical scenarios, we do not expect the previous request on
>   	 * the timeline to be still tracked by timeline->last_request if it
> @@ -1153,7 +1190,7 @@ void i915_request_add(struct i915_request *request)
>   	if (prev && i915_request_completed(prev))
>   		i915_request_retire_upto(prev);
>   
> -	mutex_unlock(&request->timeline->mutex);
> +	mutex_unlock(&rq->timeline->mutex);
>   }
>   
>   static unsigned long local_clock_us(unsigned int *cpu)
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 875be6f71412..9a50072171f8 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -238,6 +238,13 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
>   	return fence->ops == &i915_fence_ops;
>   }
>   
> +struct i915_request * __must_check
> +__i915_request_create(struct intel_context *ce, gfp_t gfp);
> +struct i915_request * __must_check
> +i915_request_create(struct intel_context *ce);
> +
> +struct i915_request *__i915_request_commit(struct i915_request *request);
> +
>   struct i915_request * __must_check
>   i915_request_alloc(struct intel_engine_cs *engine,
>   		   struct i915_gem_context *ctx);
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index a882b8d42bd9..fea8c5869da9 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -235,10 +235,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
>   
>   static struct i915_request *alloc_request(struct intel_overlay *overlay)
>   {
> -	struct drm_i915_private *dev_priv = overlay->i915;
> -	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
> +	struct intel_engine_cs *engine = overlay->i915->engine[RCS0];
>   
> -	return i915_request_alloc(engine, dev_priv->kernel_context);
> +	return i915_request_create(engine->kernel_context);
>   }
>   
>   /* overlay needs to be disable in OCMD reg */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index 27d8f853111b..eee838dc0634 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -46,7 +46,7 @@ static int __live_active_setup(struct drm_i915_private *i915,
>   	for_each_engine(engine, i915, id) {
>   		struct i915_request *rq;
>   
> -		rq = i915_request_alloc(engine, i915->kernel_context);
> +		rq = i915_request_create(engine->kernel_context);
>   		if (IS_ERR(rq)) {
>   			err = PTR_ERR(rq);
>   			break;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index e43630b40fce..046a38743152 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		return PTR_ERR(vma);
>   
> -	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
> +	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
>   	if (IS_ERR(rq)) {
>   		i915_vma_unpin(vma);
>   		return PTR_ERR(rq);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 12fc53c694a6..12203d665a4e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
>   	if (err)
>   		return err;
>   
> -	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
> +	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
>   	if (IS_ERR(rq)) {
>   		i915_vma_unpin(vma);
>   		return PTR_ERR(rq);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index e6ffe2240126..098d7b3aa131 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -551,8 +551,7 @@ static int live_nop_request(void *arg)
>   			times[1] = ktime_get_raw();
>   
>   			for (n = 0; n < prime; n++) {
> -				request = i915_request_alloc(engine,
> -							     i915->kernel_context);
> +				request = i915_request_create(engine->kernel_context);
>   				if (IS_ERR(request)) {
>   					err = PTR_ERR(request);
>   					goto out_unlock;
> @@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine,
>   	struct i915_request *request;
>   	int err;
>   
> -	request = i915_request_alloc(engine, engine->i915->kernel_context);
> +	request = i915_request_create(engine->kernel_context);
>   	if (IS_ERR(request))
>   		return request;
>   
> @@ -853,7 +852,7 @@ static int live_all_engines(void *arg)
>   	}
>   
>   	for_each_engine(engine, i915, id) {
> -		request[id] = i915_request_alloc(engine, i915->kernel_context);
> +		request[id] = i915_request_create(engine->kernel_context);
>   		if (IS_ERR(request[id])) {
>   			err = PTR_ERR(request[id]);
>   			pr_err("%s: Request allocation failed with err=%d\n",
> @@ -962,7 +961,7 @@ static int live_sequential_engines(void *arg)
>   			goto out_unlock;
>   		}
>   
> -		request[id] = i915_request_alloc(engine, i915->kernel_context);
> +		request[id] = i915_request_create(engine->kernel_context);
>   		if (IS_ERR(request[id])) {
>   			err = PTR_ERR(request[id]);
>   			pr_err("%s: Request allocation failed for %s with err=%d\n",
> diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> index 8e7bcaa1eb66..227479349c61 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> @@ -454,7 +454,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
>   		goto out;
>   	}
>   
> -	rq = i915_request_alloc(engine, engine->i915->kernel_context);
> +	rq = i915_request_create(engine->kernel_context);
>   	if (IS_ERR(rq))
>   		goto out_unpin;
>   
> @@ -678,7 +678,7 @@ static int live_hwsp_wrap(void *arg)
>   		if (!intel_engine_can_store_dword(engine))
>   			continue;
>   
> -		rq = i915_request_alloc(engine, i915->kernel_context);
> +		rq = i915_request_create(engine->kernel_context);
>   		if (IS_ERR(rq)) {
>   			err = PTR_ERR(rq);
>   			goto out;
> 

With the comment added:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 95b1fdc5826a..9aeef88176b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -104,4 +104,16 @@  static inline void intel_context_put(struct intel_context *ce)
 	kref_put(&ce->ref, ce->ops->destroy);
 }
 
+static inline void intel_context_timeline_lock(struct intel_context *ce)
+	__acquires(&ce->ring->timeline->mutex)
+{
+	mutex_lock(&ce->ring->timeline->mutex);
+}
+
+static inline void intel_context_timeline_unlock(struct intel_context *ce)
+	__releases(&ce->ring->timeline->mutex)
+{
+	mutex_unlock(&ce->ring->timeline->mutex);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index dce49b8b1118..a5501b136f0c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -770,7 +770,7 @@  static void restart_work(struct work_struct *work)
 		if (!intel_engine_is_idle(engine))
 			continue;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (!IS_ERR(rq))
 			i915_request_add(rq);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 5788e5ab818f..63bf092d737b 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1778,7 +1778,6 @@  static int switch_context(struct i915_request *rq)
 	u32 hw_flags = 0;
 	int ret, i;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
 	if (ppgtt) {
@@ -1908,8 +1907,6 @@  static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	struct i915_request *target;
 	long timeout;
 
-	lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
-
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 170500d49049..de3404479bc3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -942,7 +942,7 @@  int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 		struct intel_ring *ring;
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
@@ -1188,7 +1188,7 @@  gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	/* Submitting requests etc needs the hw awake. */
 	wakeref = intel_runtime_pm_get(i915);
 
-	rq = i915_request_alloc(ce->engine, i915->kernel_context);
+	rq = i915_request_create(ce->engine->kernel_context);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto out_put;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a87f790335c1..328a740e72cb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1762,7 +1762,7 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 	 * Apply the configuration by doing one context restore of the edited
 	 * context image.
 	 */
-	rq = i915_request_alloc(engine, dev_priv->kernel_context);
+	rq = i915_request_create(engine->kernel_context);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e72e4087bbfe..ca4c200ee7ec 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -550,7 +550,7 @@  static void ring_retire_requests(struct intel_ring *ring)
 }
 
 static noinline struct i915_request *
-i915_request_alloc_slow(struct intel_context *ce)
+request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 {
 	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
@@ -558,6 +558,9 @@  i915_request_alloc_slow(struct intel_context *ce)
 	if (list_empty(&ring->request_list))
 		goto out;
 
+	if (!gfpflags_allow_blocking(gfp))
+		goto out;
+
 	/* Ratelimit ourselves to prevent oom from malicious clients */
 	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
 	cond_synchronize_rcu(rq->rcustate);
@@ -566,62 +569,21 @@  i915_request_alloc_slow(struct intel_context *ce)
 	ring_retire_requests(ring);
 
 out:
-	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
-/**
- * i915_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
 struct i915_request *
-i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+__i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
-	struct i915_timeline *tl;
+	struct i915_timeline *tl = ce->ring->timeline;
 	struct i915_request *rq;
 	u32 seqno;
 	int ret;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	/*
-	 * Preempt contexts are reserved for exclusive use to inject a
-	 * preemption context switch. They are never to be used for any trivial
-	 * request!
-	 */
-	GEM_BUG_ON(ctx == i915->preempt_context);
-
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ERR_PTR(ret);
-
-	/*
-	 * Pinning the contexts may generate requests in order to acquire
-	 * GGTT space, so do this first before we reserve a seqno for
-	 * ourselves.
-	 */
-	ce = intel_context_pin(ctx, engine);
-	if (IS_ERR(ce))
-		return ERR_CAST(ce);
-
-	mutex_lock(&ce->ring->timeline->mutex);
-	intel_context_enter(ce);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
-	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
-	    i915_request_completed(rq))
-		i915_request_retire(rq);
+	/* Check that the caller provided an already pinned context */
+	__intel_context_pin(ce);
 
 	/*
 	 * Beware: Dragons be flying overhead.
@@ -653,30 +615,26 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	rq = kmem_cache_alloc(global.slab_requests,
-			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = i915_request_alloc_slow(ce);
+		rq = request_alloc_slow(ce, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
 		}
 	}
 
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->execute_cb);
-
-	tl = ce->ring->timeline;
 	ret = i915_timeline_get_seqno(tl, rq, &seqno);
 	if (ret)
 		goto err_free;
 
-	rq->i915 = i915;
-	rq->engine = engine;
-	rq->gem_context = ctx;
+	rq->i915 = ce->engine->i915;
 	rq->hw_context = ce;
+	rq->gem_context = ce->gem_context;
+	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
+	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -696,6 +654,9 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->capture_list = NULL;
 	rq->waitboost = false;
 
+	INIT_LIST_HEAD(&rq->active_list);
+	INIT_LIST_HEAD(&rq->execute_cb);
+
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -708,7 +669,8 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
+	rq->reserved_space =
+		2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -718,20 +680,16 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
-	ret = engine->request_alloc(rq);
+	ret = rq->engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Check that we didn't interrupt ourselves with a new request */
-	lockdep_assert_held(&rq->timeline->mutex);
-	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
-	rq->cookie = lockdep_pin_lock(&rq->timeline->mutex);
+	/* Keep a second pin for the dual retirement along engine and ring */
+	__intel_context_pin(ce);
 
+	intel_context_mark_active(ce);
 	return rq;
 
 err_unwind:
@@ -745,12 +703,86 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 err_free:
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
-	intel_context_exit(ce);
-	mutex_unlock(&ce->ring->timeline->mutex);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
 
+struct i915_request *
+i915_request_create(struct intel_context *ce)
+{
+	struct i915_request *rq;
+
+	intel_context_timeline_lock(ce);
+
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
+	    i915_request_completed(rq))
+		i915_request_retire(rq);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_KERNEL);
+	intel_context_exit(ce);
+	if (IS_ERR(rq))
+		goto err_unlock;
+
+	/* Check that we do not interrupt ourselves with a new request */
+	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
+
+	return rq;
+
+err_unlock:
+	intel_context_timeline_unlock(ce);
+	return rq;
+}
+
+/**
+ * i915_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct i915_request *
+i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_context *ce;
+	struct i915_request *rq;
+	int ret;
+
+	/*
+	 * Preempt contexts are reserved for exclusive use to inject a
+	 * preemption context switch. They are never to be used for any trivial
+	 * request!
+	 */
+	GEM_BUG_ON(ctx == i915->preempt_context);
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = intel_context_pin(ctx, engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = i915_request_create(ce);
+	intel_context_unpin(ce);
+
+	return rq;
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -1005,8 +1037,7 @@  __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = i915_active_request_raw(&timeline->last_request,
-				       &rq->i915->drm.struct_mutex);
+	prev = rcu_dereference_protected(timeline->last_request.request, 1);
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1027,6 +1058,11 @@  __i915_request_add_to_timeline(struct i915_request *rq)
 	list_add_tail(&rq->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
+	/*
+	 * Make sure that no request gazumped us - if it was allocated after
+	 * our i915_request_alloc() and called __i915_request_add() before
+	 * us, the timeline will hold its seqno which is later than ours.
+	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
 	__i915_active_request_set(&timeline->last_request, rq);
 
@@ -1038,36 +1074,23 @@  __i915_request_add_to_timeline(struct i915_request *rq)
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void i915_request_add(struct i915_request *request)
+struct i915_request *__i915_request_commit(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_timeline *timeline = request->timeline;
-	struct intel_ring *ring = request->ring;
+	struct intel_engine_cs *engine = rq->engine;
+	struct intel_ring *ring = rq->ring;
 	struct i915_request *prev;
 	u32 *cs;
 
 	GEM_TRACE("%s fence %llx:%lld\n",
-		  engine->name, request->fence.context, request->fence.seqno);
-
-	lockdep_assert_held(&request->timeline->mutex);
-	lockdep_unpin_lock(&request->timeline->mutex, request->cookie);
-
-	trace_i915_request_add(request);
-
-	/*
-	 * Make sure that no request gazumped us - if it was allocated after
-	 * our i915_request_alloc() and called __i915_request_add() before
-	 * us, the timeline will hold its seqno which is later than ours.
-	 */
-	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
+		  engine->name, rq->fence.context, rq->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
-	GEM_BUG_ON(request->reserved_space > request->ring->space);
-	request->reserved_space = 0;
+	GEM_BUG_ON(rq->reserved_space > ring->space);
+	rq->reserved_space = 0;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1075,17 +1098,17 @@  void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
+	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
-	request->postfix = intel_ring_offset(request, cs);
+	rq->postfix = intel_ring_offset(rq, cs);
 
-	prev = __i915_request_add_to_timeline(request);
+	prev = __i915_request_add_to_timeline(rq);
 
-	list_add_tail(&request->ring_link, &ring->request_list);
-	if (list_is_first(&request->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &request->i915->gt.active_rings);
-	request->i915->gt.active_engines |= request->engine->mask;
-	request->emitted_jiffies = jiffies;
+	list_add_tail(&rq->ring_link, &ring->request_list);
+	if (list_is_first(&rq->ring_link, &ring->request_list))
+		list_add(&ring->active_link, &rq->i915->gt.active_rings);
+	rq->i915->gt.active_engines |= rq->engine->mask;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Let the backend know a new request has arrived that may need
@@ -1101,7 +1124,7 @@  void i915_request_add(struct i915_request *request)
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
-		struct i915_sched_attr attr = request->gem_context->sched;
+		struct i915_sched_attr attr = rq->gem_context->sched;
 
 		/*
 		 * Boost actual workloads past semaphores!
@@ -1115,7 +1138,7 @@  void i915_request_add(struct i915_request *request)
 		 * far in the distance past over useful work, we keep a history
 		 * of any semaphore use along our dependency chain.
 		 */
-		if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
 			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
 
 		/*
@@ -1124,15 +1147,29 @@  void i915_request_add(struct i915_request *request)
 		 * Allow interactive/synchronous clients to jump ahead of
 		 * the bulk clients. (FQ_CODEL)
 		 */
-		if (list_empty(&request->sched.signalers_list))
+		if (list_empty(&rq->sched.signalers_list))
 			attr.priority |= I915_PRIORITY_NEWCLIENT;
 
-		engine->schedule(request, &attr);
+		engine->schedule(rq, &attr);
 	}
 	rcu_read_unlock();
-	i915_sw_fence_commit(&request->submit);
+	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
+	return prev;
+}
+
+void i915_request_add(struct i915_request *rq)
+{
+	struct i915_request *prev;
+
+	lockdep_assert_held(&rq->timeline->mutex);
+	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+
+	trace_i915_request_add(rq);
+
+	prev = __i915_request_commit(rq);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
@@ -1153,7 +1190,7 @@  void i915_request_add(struct i915_request *request)
 	if (prev && i915_request_completed(prev))
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&request->timeline->mutex);
+	mutex_unlock(&rq->timeline->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 875be6f71412..9a50072171f8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -238,6 +238,13 @@  static inline bool dma_fence_is_i915(const struct dma_fence *fence)
 	return fence->ops == &i915_fence_ops;
 }
 
+struct i915_request * __must_check
+__i915_request_create(struct intel_context *ce, gfp_t gfp);
+struct i915_request * __must_check
+i915_request_create(struct intel_context *ce);
+
+struct i915_request *__i915_request_commit(struct i915_request *request);
+
 struct i915_request * __must_check
 i915_request_alloc(struct intel_engine_cs *engine,
 		   struct i915_gem_context *ctx);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a882b8d42bd9..fea8c5869da9 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -235,10 +235,9 @@  static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 
 static struct i915_request *alloc_request(struct intel_overlay *overlay)
 {
-	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
+	struct intel_engine_cs *engine = overlay->i915->engine[RCS0];
 
-	return i915_request_alloc(engine, dev_priv->kernel_context);
+	return i915_request_create(engine->kernel_context);
 }
 
 /* overlay needs to be disable in OCMD reg */
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 27d8f853111b..eee838dc0634 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -46,7 +46,7 @@  static int __live_active_setup(struct drm_i915_private *i915,
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index e43630b40fce..046a38743152 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -202,7 +202,7 @@  static int gpu_set(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 12fc53c694a6..12203d665a4e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -468,7 +468,7 @@  static int make_obj_busy(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index e6ffe2240126..098d7b3aa131 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -551,8 +551,7 @@  static int live_nop_request(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
-				request = i915_request_alloc(engine,
-							     i915->kernel_context);
+				request = i915_request_create(engine->kernel_context);
 				if (IS_ERR(request)) {
 					err = PTR_ERR(request);
 					goto out_unlock;
@@ -649,7 +648,7 @@  empty_request(struct intel_engine_cs *engine,
 	struct i915_request *request;
 	int err;
 
-	request = i915_request_alloc(engine, engine->i915->kernel_context);
+	request = i915_request_create(engine->kernel_context);
 	if (IS_ERR(request))
 		return request;
 
@@ -853,7 +852,7 @@  static int live_all_engines(void *arg)
 	}
 
 	for_each_engine(engine, i915, id) {
-		request[id] = i915_request_alloc(engine, i915->kernel_context);
+		request[id] = i915_request_create(engine->kernel_context);
 		if (IS_ERR(request[id])) {
 			err = PTR_ERR(request[id]);
 			pr_err("%s: Request allocation failed with err=%d\n",
@@ -962,7 +961,7 @@  static int live_sequential_engines(void *arg)
 			goto out_unlock;
 		}
 
-		request[id] = i915_request_alloc(engine, i915->kernel_context);
+		request[id] = i915_request_create(engine->kernel_context);
 		if (IS_ERR(request[id])) {
 			err = PTR_ERR(request[id]);
 			pr_err("%s: Request allocation failed for %s with err=%d\n",
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index 8e7bcaa1eb66..227479349c61 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -454,7 +454,7 @@  tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
 		goto out;
 	}
 
-	rq = i915_request_alloc(engine, engine->i915->kernel_context);
+	rq = i915_request_create(engine->kernel_context);
 	if (IS_ERR(rq))
 		goto out_unpin;
 
@@ -678,7 +678,7 @@  static int live_hwsp_wrap(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out;