[21/27] drm/i915: Protect request retirement with timeline->mutex

Message ID	20190726084613.22129-21-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 26 Jul 2019 09:46:07 +0100 Message-Id: <20190726084613.22129-21-chris@chris-wilson.co.uk> In-Reply-To: <20190726084613.22129-1-chris@chris-wilson.co.uk> References: <20190726084613.22129-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 21/27] drm/i915: Protect request retirement with timeline->mutex Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/27] drm/i915/gt: Add to timeline requires the timeline mutex \| expand [01/27] drm/i915/gt: Add to timeline requires the timeline mutex [02/27] drm/i915: Unshare the idle-barrier from other kernel requests [03/27] drm/i915/execlists: Force preemption [04/27] drm/i915: Replace hangcheck by heartbeats [05/27] drm/i915/gem: Make caps.scheduler static [06/27] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt [07/27] drm/i915/gt: Provide a local intel_context.vm [08/27] drm/i915: Remove lrc default desc from GEM context [09/27] drm/i915: Push the ring creation flags to the backend [10/27] drm/i915: Flush extra hard after writing relocations through the GTT [11/27] drm/i915: Hide unshrinkable context objects from the shrinker [12/27] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT [13/27] drm/i915: Introduce for_each_user_engine() [14/27] drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc [15/27] drm/i915: Isolate i915_getparam_ioctl() [16/27] drm/i915: Only include active engines in the capture state [17/27] drm/i915: Teach execbuffer to take the engine wakeref not GT [18/27] drm/i915/gt: Track timeline activeness in enter/exit [19/27] drm/i915/gt: Convert timeline tracking to spinlock [20/27] drm/i915/gt: Guard timeline pinning with its own mutex [21/27] drm/i915: Protect request retirement with timeline->mutex [22/27] drm/i915: Replace struct_mutex for batch pool serialisation [23/27] drm/i915/gt: Mark context->active_count as protected by timeline->mutex [24/27] drm/i915: Forgo last_fence active request tracking [25/27] drm/i915/overlay: Switch to using i915_active tracking [26/27] drm/i915: Extract intel_frontbuffer active tracking [27/27] drm/i915: Markup expected timeline locks for i915_active

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 8d90498eaf46..44add172cdc8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -734,63 +734,6 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; @@ -2118,8 +2061,73 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_throttle(struct intel_context *ce) { + struct intel_ring *ring = ce->ring; + struct intel_timeline *tl = ring->timeline; + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &tl->requests, link) { + if (rq->ring != ring) + continue; + + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->link == &tl->requests) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int +__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +{ + int err; + + if (likely(atomic_inc_not_zero(&ce->pin_count))) + return 0; + + err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); + if (err) + return err; + + err = __intel_context_do_pin(ce); + mutex_unlock(&eb->i915->drm.struct_mutex); + + return err; +} + +static void +__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) +{ + if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + return; + + mutex_lock(&eb->i915->drm.struct_mutex); + intel_context_unpin(ce); + mutex_unlock(&eb->i915->drm.struct_mutex); +} + +static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +{ + struct intel_timeline *tl; + struct i915_request *rq; int err; /* @@ -2135,7 +2143,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = intel_context_pin(ce); + err = __eb_pin_context(eb, ce); if (err) return err; @@ -2147,23 +2155,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) * until the timeline is idle, which in turn releases the wakeref * taken on the engine, and the parent device. */ - err = intel_context_timeline_lock(ce); - if (err) + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); goto err_unpin; + } intel_context_enter(ce); - intel_context_timeline_unlock(ce); + rq = eb_throttle(ce); + + intel_context_timeline_unlock(tl); + + if (rq) { + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + err = -EINTR; + goto err_exit; + } + + i915_request_put(rq); + } eb->engine = ce->engine; eb->context = ce; return 0; +err_exit: + mutex_lock(&tl->mutex); + intel_context_exit(ce); + intel_context_timeline_unlock(tl); err_unpin: - intel_context_unpin(ce); + __eb_unpin_context(eb, ce); return err; } -static void eb_unpin_context(struct i915_execbuffer *eb) +static void eb_unpin_engine(struct i915_execbuffer *eb) { struct intel_context *ce = eb->context; struct intel_timeline *tl = ce->ring->timeline; @@ -2172,7 +2200,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb) intel_context_exit(ce); mutex_unlock(&tl->mutex); - intel_context_unpin(ce); + __eb_unpin_context(eb, ce); } static unsigned int @@ -2217,9 +2245,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_pin_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { struct intel_context *ce; unsigned int idx; @@ -2234,7 +2262,7 @@ eb_select_engine(struct i915_execbuffer *eb, if (IS_ERR(ce)) return PTR_ERR(ce); - err = eb_pin_context(eb, ce); + err = __eb_pin_engine(eb, ce); intel_context_put(ce); return err; @@ -2452,16 +2480,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - err = i915_mutex_lock_interruptible(dev); - if (err) - goto err_context; - - err = eb_select_engine(&eb, file, args); + err = eb_pin_engine(&eb, file, args); if (unlikely(err)) - goto err_unlock; + goto err_context; - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) + err = i915_mutex_lock_interruptible(dev); + if (err) goto err_engine; err = eb_relocate(&eb); @@ -2615,10 +2639,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_vma: if (eb.exec) eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: mutex_unlock(&dev->struct_mutex); +err_engine: + eb_unpin_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); err_destroy: diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 13f28dd316bc..cb63fc7b1b18 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_timeline_types.h" void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, @@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } -static inline int __must_check +static inline struct intel_timeline *__must_check intel_context_timeline_lock(struct intel_context *ce) __acquires(&ce->ring->timeline->mutex) { - return mutex_lock_interruptible(&ce->ring->timeline->mutex); + struct intel_timeline *tl = ce->ring->timeline; + int err; + + err = mutex_lock_interruptible(&tl->mutex); + if (err) + return ERR_PTR(err); + + return tl; } -static inline void intel_context_timeline_unlock(struct intel_context *ce) - __releases(&ce->ring->timeline->mutex) +static inline void intel_context_timeline_unlock(struct intel_timeline *tl) + __releases(&tl->mutex) { - mutex_unlock(&ce->ring->timeline->mutex); + mutex_unlock(&tl->mutex); } int intel_context_prepare_remote_request(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 095ddcbd75bf..7a7e385bb979 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -744,7 +744,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) engine->status_page.vma)) goto out_frame; - INIT_LIST_HEAD(&frame->ring.request_list); frame->ring.timeline = &frame->timeline; frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 42330b1074e6..281a72af8406 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -24,6 +24,7 @@ static void heartbeat(struct work_struct *wrk) struct intel_engine_cs *engine = container_of(wrk, typeof(*engine), heartbeat.work); struct intel_context *ce = engine->kernel_context; + struct intel_timeline *tl; struct i915_request *rq; if (!intel_engine_pm_get_if_awake(engine)) @@ -57,7 +58,8 @@ static void heartbeat(struct work_struct *wrk) if (engine->wakeref_serial == engine->serial) goto out; - if (intel_context_timeline_lock(ce)) + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) goto out; intel_context_enter(ce); @@ -73,7 +75,7 @@ static void heartbeat(struct work_struct *wrk) __i915_request_commit(rq); unlock: - intel_context_timeline_unlock(ce); + intel_context_timeline_unlock(tl); out: schedule_delayed_work(&engine->heartbeat, delay()); intel_engine_pm_put(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 0ef07ada0ae7..d8b82b6f7db9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -63,8 +63,6 @@ struct intel_ring { void *vaddr; struct intel_timeline *timeline; - struct list_head request_list; - struct list_head active_link; /* * As we have two types of rings, one global to the engine used diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 117ca64149e1..c2a25c7adb46 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -14,7 +14,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) gt->i915 = i915; gt->uncore = &i915->uncore; - INIT_LIST_HEAD(&gt->active_rings); INIT_LIST_HEAD(&gt->closed_vma); spin_lock_init(&gt->closed_lock); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index b097d83fe112..70b8f025e4da 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -40,8 +40,6 @@ struct intel_gt { struct list_head hwsp_free_list; } timelines; - struct list_head active_rings; - struct intel_wakeref wakeref; struct list_head closed_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 88e0ad93b9a9..2be97d68a103 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1634,6 +1634,7 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); + intel_ring_reset(ce->ring, ce->ring->tail); } static void diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index d8efb88f33f3..9d9be5fed9fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1275,7 +1275,7 @@ void intel_ring_unpin(struct intel_ring *ring) GEM_TRACE("ring:%llx unpin\n", ring->timeline->fence_context); /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->tail); + intel_ring_reset(ring, ring->emit); i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) @@ -1340,7 +1340,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, return ERR_PTR(-ENOMEM); kref_init(&ring->ref); - INIT_LIST_HEAD(&ring->request_list); ring->timeline = intel_timeline_get(timeline); ring->size = size; @@ -1888,21 +1887,25 @@ static int ring_request_alloc(struct i915_request *request) static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { + struct intel_timeline *tl = ring->timeline; struct i915_request *target; long timeout; if (intel_ring_update_space(ring) >= bytes) return 0; - GEM_BUG_ON(list_empty(&ring->request_list)); - list_for_each_entry(target, &ring->request_list, ring_link) { + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, ring->emit, ring->size)) break; } - if (WARN_ON(&target->ring_link == &ring->request_list)) + if (GEM_WARN_ON(&target->link == &tl->requests)) return -ENOSPC; timeout = i915_request_wait(target, diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 284f2e5ad2cf..3ed511e8e098 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -68,7 +68,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->base.timeline = &ring->timeline; atomic_set(&ring->base.pin_count, 1); - INIT_LIST_HEAD(&ring->base.request_list); intel_ring_update_space(&ring->base); return &ring->base; diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index e3b45fe747ae..6dd1dfda3c5a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq) i915_request_add(rq); timeout = i915_request_wait(rq, 0, HZ / 10); - if (timeout < 0) + if (timeout < 0) { err = timeout; - else + } else { + mutex_lock(&rq->timeline->mutex); i915_request_retire_upto(rq); + mutex_unlock(&rq->timeline->mutex); + } i915_request_put(rq); @@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce) struct intel_timeline *tl = ce->ring->timeline; int err = 0; + mutex_lock(&tl->mutex); do { struct i915_request *rq; long timeout; @@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce) i915_request_put(rq); } while (!err); + mutex_unlock(&tl->mutex); return err; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 81094f250bdb..d203763b7d86 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -180,40 +180,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void advance_ring(struct i915_request *request) -{ - struct intel_ring *ring = request->ring; - unsigned int tail; - - /* - * We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); - if (list_is_last(&request->ring_link, &ring->request_list)) { - /* - * We may race here with execlists resubmitting this request - * as we retire it. The resubmission will move the ring->tail - * forwards (to request->wa_tail). We either read the - * current value that was written to hw, or the value that - * is just about to be. Either works, if we miss the last two - * noops - they are safe to be replayed on a reset. - */ - tail = READ_ONCE(request->tail); - list_del(&ring->active_link); - } else { - tail = request->postfix; - } - list_del_init(&request->ring_link); - - ring->head = tail; -} - static void free_capture_list(struct i915_request *request) { struct i915_capture_list *capture; @@ -231,7 +197,7 @@ static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; - lockdep_assert_held(&rq->i915->drm.struct_mutex); + lockdep_assert_held(&rq->timeline->mutex); if (!i915_request_completed(rq)) return false; @@ -243,7 +209,17 @@ static bool i915_request_retire(struct i915_request *rq) GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); - advance_ring(rq); + /* + * We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests)); + rq->ring->head = rq->postfix; /* * Walk through the active list, calling retire on each. This allows @@ -320,7 +296,7 @@ static bool i915_request_retire(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_ring *ring = rq->ring; + struct intel_timeline * const tl = rq->timeline; struct i915_request *tmp; GEM_TRACE("%s fence %llx:%lld, current %d\n", @@ -328,15 +304,11 @@ void i915_request_retire_upto(struct i915_request *rq) rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); - lockdep_assert_held(&rq->i915->drm.struct_mutex); + lockdep_assert_held(&tl->mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->ring_link)) - return; - do { - tmp = list_first_entry(&ring->request_list, - typeof(*tmp), ring_link); + tmp = list_first_entry(&tl->requests, typeof(*tmp), link); } while (i915_request_retire(tmp) && tmp != rq); } @@ -563,29 +535,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void ring_retire_requests(struct intel_ring *ring) +static void retire_requests(struct intel_timeline *tl) { struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) + list_for_each_entry_safe(rq, rn, &tl->requests, link) if (!i915_request_retire(rq)) break; } static noinline struct i915_request * -request_alloc_slow(struct intel_context *ce, gfp_t gfp) +request_alloc_slow(struct intel_timeline *tl, gfp_t gfp) { - struct intel_ring *ring = ce->ring; struct i915_request *rq; - if (list_empty(&ring->request_list)) + if (list_empty(&tl->requests)) goto out; if (!gfpflags_allow_blocking(gfp)) goto out; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); rq = kmem_cache_alloc(global.slab_requests, @@ -594,11 +565,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp) return rq; /* Ratelimit ourselves to prevent oom from malicious clients */ - rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_last_entry(&tl->requests, typeof(*rq), link); cond_synchronize_rcu(rq->rcustate); /* Retire our old requests in the hope that we free some */ - ring_retire_requests(ring); + retire_requests(tl); out: return kmem_cache_alloc(global.slab_requests, gfp); @@ -649,7 +620,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq = kmem_cache_alloc(global.slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = request_alloc_slow(ce, gfp); + rq = request_alloc_slow(tl, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; @@ -741,15 +712,15 @@ struct i915_request * i915_request_create(struct intel_context *ce) { struct i915_request *rq; - int err; + struct intel_timeline *tl; - err = intel_context_timeline_lock(ce); - if (err) - return ERR_PTR(err); + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return ERR_CAST(tl); /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list)) + rq = list_first_entry(&tl->requests, typeof(*rq), link); + if (!list_is_last(&rq->link, &tl->requests)) i915_request_retire(rq); intel_context_enter(ce); @@ -759,22 +730,22 @@ i915_request_create(struct intel_context *ce) goto err_unlock; /* Check that we do not interrupt ourselves with a new request */ - rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + rq->cookie = lockdep_pin_lock(&tl->mutex); return rq; err_unlock: - intel_context_timeline_unlock(ce); + intel_context_timeline_unlock(tl); return rq; } static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - if (list_is_first(&signal->ring_link, &signal->ring->request_list)) + if (list_is_first(&signal->link, &signal->ring->timeline->requests)) return 0; - signal = list_prev_entry(signal, ring_link); + signal = list_prev_entry(signal, link); if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) return 0; @@ -1167,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) */ GEM_BUG_ON(rq->reserved_space > ring->space); rq->reserved_space = 0; + rq->emitted_jiffies = jiffies; /* * Record the position of the start of the breadcrumb so that @@ -1180,11 +1152,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) prev = __i915_request_add_to_timeline(rq); - list_add_tail(&rq->ring_link, &ring->request_list); - if (list_is_first(&rq->ring_link, &ring->request_list)) - list_add(&ring->active_link, &rq->i915->gt.active_rings); - rq->emitted_jiffies = jiffies; - /* * Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -1235,10 +1202,11 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) void i915_request_add(struct i915_request *rq) { + struct intel_timeline * const tl = rq->timeline; struct i915_request *prev; - lockdep_assert_held(&rq->timeline->mutex); - lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + lockdep_assert_held(&tl->mutex); + lockdep_unpin_lock(&tl->mutex, rq->cookie); trace_i915_request_add(rq); @@ -1261,10 +1229,10 @@ void i915_request_add(struct i915_request *rq) * work on behalf of others -- but instead we should benefit from * improved resource management. (Well, that's the theory at least.) */ - if (prev && i915_request_completed(prev)) + if (prev && i915_request_completed(prev) && prev->timeline == tl) i915_request_retire_upto(prev); - mutex_unlock(&rq->timeline->mutex); + mutex_unlock(&tl->mutex); } static unsigned long local_clock_us(unsigned int *cpu) @@ -1484,18 +1452,43 @@ long i915_request_wait(struct i915_request *rq, bool i915_retire_requests(struct drm_i915_private *i915) { - struct intel_ring *ring, *tmp; + struct intel_gt_timelines *timelines = &i915->gt.timelines; + struct intel_timeline *tl, *tn; + LIST_HEAD(free); + + spin_lock(&timelines->lock); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) + continue; + + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock(&timelines->lock); - lockdep_assert_held(&i915->drm.struct_mutex); + retire_requests(tl); - list_for_each_entry_safe(ring, tmp, - &i915->gt.active_rings, active_link) { - intel_ring_get(ring); /* last rq holds reference! */ - ring_retire_requests(ring); - intel_ring_put(ring); + spin_lock(&timelines->lock); + + /* Restart iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (!--tl->active_count) + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } } + spin_unlock(&timelines->lock); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); - return !list_empty(&i915->gt.active_rings); + return !list_empty(&timelines->active_list); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 313df3c37158..22e506e960e0 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -223,9 +223,6 @@ struct i915_request { /** timeline->request entry for this request */ struct list_head link; - /** ring->request_list entry for this request */ - struct list_head ring_link; - struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_link;

[21/27] drm/i915: Protect request retirement with timeline->mutex

Commit Message

Patch