[18/27] drm/i915/gt: Track timeline activeness in enter/exit

Message ID	20190726084613.22129-18-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 26 Jul 2019 09:46:04 +0100 Message-Id: <20190726084613.22129-18-chris@chris-wilson.co.uk> In-Reply-To: <20190726084613.22129-1-chris@chris-wilson.co.uk> References: <20190726084613.22129-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 18/27] drm/i915/gt: Track timeline activeness in enter/exit Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/27] drm/i915/gt: Add to timeline requires the timeline mutex \| expand [01/27] drm/i915/gt: Add to timeline requires the timeline mutex [02/27] drm/i915: Unshare the idle-barrier from other kernel requests [03/27] drm/i915/execlists: Force preemption [04/27] drm/i915: Replace hangcheck by heartbeats [05/27] drm/i915/gem: Make caps.scheduler static [06/27] drm/i915: Move aliasing_ppgtt underneath its i915_ggtt [07/27] drm/i915/gt: Provide a local intel_context.vm [08/27] drm/i915: Remove lrc default desc from GEM context [09/27] drm/i915: Push the ring creation flags to the backend [10/27] drm/i915: Flush extra hard after writing relocations through the GTT [11/27] drm/i915: Hide unshrinkable context objects from the shrinker [12/27] drm/i915/gt: Move the [class][inst] lookup for engines onto the GT [13/27] drm/i915: Introduce for_each_user_engine() [14/27] drm/i915: Use intel_engine_lookup_user for probing HAS_BSD etc [15/27] drm/i915: Isolate i915_getparam_ioctl() [16/27] drm/i915: Only include active engines in the capture state [17/27] drm/i915: Teach execbuffer to take the engine wakeref not GT [18/27] drm/i915/gt: Track timeline activeness in enter/exit [19/27] drm/i915/gt: Convert timeline tracking to spinlock [20/27] drm/i915/gt: Guard timeline pinning with its own mutex [21/27] drm/i915: Protect request retirement with timeline->mutex [22/27] drm/i915: Replace struct_mutex for batch pool serialisation [23/27] drm/i915/gt: Mark context->active_count as protected by timeline->mutex [24/27] drm/i915: Forgo last_fence active request tracking [25/27] drm/i915/overlay: Switch to using i915_active tracking [26/27] drm/i915: Extract intel_frontbuffer active tracking [27/27] drm/i915: Markup expected timeline locks for i915_active

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index a5a0aefcc04c..392a1e4fdd76 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -39,7 +39,6 @@ static void i915_gem_park(struct drm_i915_private *i915) i915_gem_batch_pool_fini(&engine->batch_pool); } - intel_timelines_park(i915); i915_vma_parked(i915); i915_globals_park(); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index eb46b7e3c1d0..2bae9bcffc1c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -261,10 +261,12 @@ int __init i915_global_context_init(void) void intel_context_enter_engine(struct intel_context *ce) { intel_engine_pm_get(ce->engine); + intel_timeline_enter(ce->ring->timeline); } void intel_context_exit_engine(struct intel_context *ce) { + intel_timeline_exit(ce->ring->timeline); intel_engine_pm_put(ce->engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 6349bb038c72..7bbc25631979 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -90,6 +90,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; + intel_timeline_enter(rq->timeline); i915_request_add_barriers(rq); __i915_request_commit(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c899ce34d0d3..88e0ad93b9a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3309,6 +3309,8 @@ static void virtual_context_enter(struct intel_context *ce) for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_get(ve->siblings[n]); + + intel_timeline_enter(ce->ring->timeline); } static void virtual_context_exit(struct intel_context *ce) @@ -3316,6 +3318,8 @@ static void virtual_context_exit(struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); unsigned int n; + intel_timeline_exit(ce->ring->timeline); + for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_put(ve->siblings[n]); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 6daa9eb59e19..4af0b9801d91 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -278,64 +278,11 @@ void intel_timelines_init(struct drm_i915_private *i915) timelines_init(&i915->gt); } -static void timeline_add_to_active(struct intel_timeline *tl) -{ - struct intel_gt_timelines *gt = &tl->gt->timelines; - - mutex_lock(&gt->mutex); - list_add(&tl->link, &gt->active_list); - mutex_unlock(&gt->mutex); -} - -static void timeline_remove_from_active(struct intel_timeline *tl) -{ - struct intel_gt_timelines *gt = &tl->gt->timelines; - - mutex_lock(&gt->mutex); - list_del(&tl->link); - mutex_unlock(&gt->mutex); -} - -static void timelines_park(struct intel_gt *gt) -{ - struct intel_gt_timelines *timelines = &gt->timelines; - struct intel_timeline *timeline; - - mutex_lock(&timelines->mutex); - list_for_each_entry(timeline, &timelines->active_list, link) { - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&timeline->sync); - } - mutex_unlock(&timelines->mutex); -} - -/** - * intel_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void intel_timelines_park(struct drm_i915_private *i915) -{ - timelines_park(&i915->gt); -} - void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); - i915_syncmap_free(&timeline->sync); - if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); else @@ -370,6 +317,7 @@ int intel_timeline_pin(struct intel_timeline *tl) if (tl->pin_count++) return 0; GEM_BUG_ON(!tl->pin_count); + GEM_BUG_ON(tl->active_count); err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) @@ -380,7 +328,6 @@ int intel_timeline_pin(struct intel_timeline *tl) offset_in_page(tl->hwsp_offset); cacheline_acquire(tl->hwsp_cacheline); - timeline_add_to_active(tl); return 0; @@ -389,6 +336,40 @@ int intel_timeline_pin(struct intel_timeline *tl) return err; } +void intel_timeline_enter(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + + GEM_BUG_ON(!tl->pin_count); + if (tl->active_count++) + return; + GEM_BUG_ON(!tl->active_count); /* overflow? */ + + mutex_lock(&timelines->mutex); + list_add(&tl->link, &timelines->active_list); + mutex_unlock(&timelines->mutex); +} + +void intel_timeline_exit(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + + GEM_BUG_ON(!tl->active_count); + if (--tl->active_count) + return; + + mutex_lock(&timelines->mutex); + list_del(&tl->link); + mutex_unlock(&timelines->mutex); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); +} + static u32 timeline_advance(struct intel_timeline *tl) { GEM_BUG_ON(!tl->pin_count); @@ -546,16 +527,9 @@ void intel_timeline_unpin(struct intel_timeline *tl) if (--tl->pin_count) return; - timeline_remove_from_active(tl); + GEM_BUG_ON(tl->active_count); cacheline_release(tl->hwsp_cacheline); - /* - * Since this timeline is idle, all bariers upon which we were waiting - * must also be complete and so we can discard the last used barriers - * without loss of information. - */ - i915_syncmap_free(&tl->sync); - __i915_vma_unpin(tl->hwsp_ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index e08cebf64833..f583af1ba18d 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -77,9 +77,11 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, } int intel_timeline_pin(struct intel_timeline *tl); +void intel_timeline_enter(struct intel_timeline *tl); int intel_timeline_get_seqno(struct intel_timeline *tl, struct i915_request *rq, u32 *seqno); +void intel_timeline_exit(struct intel_timeline *tl); void intel_timeline_unpin(struct intel_timeline *tl); int intel_timeline_read_hwsp(struct i915_request *from, @@ -87,7 +89,6 @@ int intel_timeline_read_hwsp(struct i915_request *from, u32 *hwsp_offset); void intel_timelines_init(struct drm_i915_private *i915); -void intel_timelines_park(struct drm_i915_private *i915); void intel_timelines_fini(struct drm_i915_private *i915); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 9a71aea7a338..b1a9f0c54bf0 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -25,7 +25,25 @@ struct intel_timeline { struct mutex mutex; /* protects the flow of requests */ + /* + * pin_count and active_count track essentially the same thing: + * How many requests are in flight or may be under construction. + * + * We need two distinct counters so that we can assign different + * lifetimes to the events for different use-cases. For example, + * we want to permanently keep the timeline pinned for the kernel + * context so that we can issue requests at any time without having + * to acquire space in the GGTT. However, we want to keep tracking + * the activity (to be able to detect when we become idle) along that + * permanently pinned timeline and so end up requiring two counters. + * + * Note that the active_count is protected by the intel_timeline.mutex, + * but the pin_count is protected by a combination of serialisation + * from the intel_context caller plus internal atomicity. + */ unsigned int pin_count; + unsigned int active_count; + const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; u32 hwsp_offset; diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index f0a840030382..d54113697745 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -816,8 +816,6 @@ static int live_hwsp_recycle(void *arg) if (err) goto out; - - intel_timelines_park(i915); /* Encourage recycling! */ } while (!__igt_timeout(end_time, NULL)); }

[18/27] drm/i915/gt: Track timeline activeness in enter/exit

Commit Message

Patch