[24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling

Message ID	20190206130356.18771-25-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Wed, 6 Feb 2019 13:03:34 +0000 Message-Id: <20190206130356.18771-25-chris@chris-wilson.co.uk> In-Reply-To: <20190206130356.18771-1-chris@chris-wilson.co.uk> References: <20190206130356.18771-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/46] drm/i915: Hack and slash, throttle execbuffer hogs \| expand [01/46] drm/i915: Hack and slash, throttle execbuffer hogs [02/46] drm/i915: Revoke mmaps and prevent access to fence registers across reset [03/46] drm/i915: Force the GPU reset upon wedging [04/46] drm/i915: Uninterruptibly drain the timelines on unwedging [05/46] drm/i915: Wait for old resets before applying debugfs/i915_wedged [06/46] drm/i915: Serialise resets with wedging [07/46] drm/i915: Don't claim an unstarted request was guilty [08/46] drm/i915/execlists: Suppress mere WAIT preemption [09/46] drm/i915/execlists: Suppress redundant preemption [10/46] drm/i915: Make request allocation caches global [11/46] drm/i915: Keep timeline HWSP allocated until idle across the system [12/46] drm/i915/execlists: Refactor out can_merge_rq() [13/46] drm/i915: Compute the global scheduler caps [14/46] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ [15/46] drm/i915: Prioritise non-busywait semaphore workloads [16/46] drm/i915: Show support for accurate sw PMU busyness tracking [17/46] drm/i915: Apply rps waitboosting for dma_fence_wait_timeout() [18/46] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno [19/46] drm/i915/pmu: Always sample an active ringbuffer [20/46] drm/i915: Remove access to global seqno in the HWSP [21/46] drm/i915: Remove i915_request.global_seqno [22/46] drm/i915: Force GPU idle on suspend [23/46] drm/i915/selftests: Improve switch-to-kernel-context checking [24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling [25/46] drm/i915: Store the BIT(engine->id) as the engine's mask [26/46] drm/i915: Refactor common code to load initial power context [27/46] drm/i915: Reduce presumption of request ordering for barriers [28/46] drm/i915: Remove has-kernel-context [29/46] drm/i915: Introduce the i915_user_extension_method [30/46] drm/i915: Track active engines within a context [31/46] drm/i915: Introduce a context barrier callback [32/46] drm/i915: Create/destroy VM (ppGTT) for use with contexts [33/46] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction [34/46] drm/i915: Allow contexts to share a single timeline across all engines [35/46] drm/i915: Fix I915_EXEC_RING_MASK [36/46] drm/i915: Remove last traces of exec-id (GEM_BUSY) [37/46] drm/i915: Re-arrange execbuf so context is known before engine [38/46] drm/i915: Allow a context to define its set of engines [39/46] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] [40/46] drm/i915: Pass around the intel_context [41/46] drm/i915: Split struct intel_context definition to its own header [42/46] drm/i915: Move over to intel_context_lookup() [43/46] drm/i915: Load balancing across a virtual engine [44/46] drm/i915: Extend execution fence to support a callback [45/46] drm/i915/execlists: Virtual engine bonding [46/46] drm/i915: Allow specification of parallel execbuf

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f5a3558e00fd..36da8ab1e7ce 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -712,8 +712,7 @@ static int i915_load_modeset_init(struct drm_device *dev) return 0; cleanup_gem: - if (i915_gem_suspend(dev_priv)) - DRM_ERROR("failed to idle hardware; continuing to unload!\n"); + i915_gem_suspend(dev_priv); i915_gem_fini(dev_priv); cleanup_modeset: intel_modeset_cleanup(dev); @@ -1784,8 +1783,7 @@ void i915_driver_unload(struct drm_device *dev) /* Flush any external code that still may be under the RCU lock */ synchronize_rcu(); - if (i915_gem_suspend(dev_priv)) - DRM_ERROR("failed to idle hardware; continuing to unload!\n"); + i915_gem_suspend(dev_priv); drm_atomic_helper_shutdown(dev); @@ -1893,7 +1891,6 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv) static int i915_drm_prepare(struct drm_device *dev) { struct drm_i915_private *i915 = to_i915(dev); - int err; /* * NB intel_display_suspend() may issue new requests after we've @@ -1901,12 +1898,9 @@ static int i915_drm_prepare(struct drm_device *dev) * split out that work and pull it forward so that after point, * the GPU is not woken again. */ - err = i915_gem_suspend(i915); - if (err) - dev_err(&i915->drm.pdev->dev, - "GEM idle failed, suspend/resume might fail\n"); + i915_gem_suspend(i915); - return err; + return 0; } static int i915_drm_suspend(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4d697b1002af..8a72dad9471f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3031,7 +3031,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, unsigned int flags, long timeout); -int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); +void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); vm_fault_t i915_gem_fault(struct vm_fault *vmf); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 89b2d3ac26ce..43bc26d5807a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2930,13 +2930,6 @@ static void __sleep_rcu(struct rcu_head *rcu) } } -static inline bool -new_requests_since_last_retire(const struct drm_i915_private *i915) -{ - return (READ_ONCE(i915->gt.active_requests) || - work_pending(&i915->gt.idle_work.work)); -} - static void assert_kernel_context_is_current(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -2945,7 +2938,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) if (i915_terminally_wedged(&i915->gpu_error)) return; - GEM_BUG_ON(i915->gt.active_requests); + i915_retire_requests(i915); + for_each_engine(engine, i915, id) { GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != @@ -2953,78 +2947,76 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) } } +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +{ + if (i915_gem_switch_to_kernel_context(i915)) + return false; + + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + HZ / 10)) + return false; + + assert_kernel_context_is_current(i915); + return true; +} + static void i915_gem_idle_work_handler(struct work_struct *work) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), gt.idle_work.work); + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gt.idle_work.work); + typeof(i915->gt) *gt = &i915->gt; unsigned int epoch = I915_EPOCH_INVALID; bool rearm_hangcheck; - if (!READ_ONCE(dev_priv->gt.awake)) + if (!READ_ONCE(gt->awake)) return; - if (READ_ONCE(dev_priv->gt.active_requests)) + if (READ_ONCE(gt->active_requests)) return; - /* - * Flush out the last user context, leaving only the pinned - * kernel context resident. When we are idling on the kernel_context, - * no more new requests (with a context switch) are emitted and we - * can finally rest. A consequence is that the idle work handler is - * always called at least twice before idling (and if the system is - * idle that implies a round trip through the retire worker). - */ - mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_switch_to_kernel_context(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - - GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", - READ_ONCE(dev_priv->gt.active_requests)); - - /* - * Wait for last execlists context complete, but bail out in case a - * new request is submitted. As we don't trust the hardware, we - * continue on if the wait times out. This is necessary to allow - * the machine to suspend even if the hardware dies, and we will - * try to recover in resume (after depriving the hardware of power, - * it may be in a better mmod). - */ - __wait_for(if (new_requests_since_last_retire(dev_priv)) return, - intel_engines_are_idle(dev_priv), - I915_IDLE_ENGINES_TIMEOUT * 1000, - 10, 500); - rearm_hangcheck = - cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { + if (!mutex_trylock(&i915->drm.struct_mutex)) { /* Currently busy, come back later */ - mod_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, + mod_delayed_work(i915->wq, + &gt->idle_work, msecs_to_jiffies(50)); goto out_rearm; } /* - * New request retired after this work handler started, extend active - * period until next instance of the work. + * Flush out the last user context, leaving only the pinned + * kernel context resident. Should anything unfortunate happen + * while we are idle (such as the GPU being power cycled), no users + * will be harmed. */ - if (new_requests_since_last_retire(dev_priv)) - goto out_unlock; - - epoch = __i915_gem_park(dev_priv); + if (!gt->active_requests && !work_pending(&gt->idle_work.work)) { + ++gt->active_requests; /* don't requeue idle */ + + if (!switch_to_kernel_context_sync(i915)) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + } + i915_retire_requests(i915); - assert_kernel_context_is_current(dev_priv); + if (!--gt->active_requests) { + epoch = __i915_gem_park(i915); + rearm_hangcheck = false; + } + } - rearm_hangcheck = false; -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&i915->drm.struct_mutex); out_rearm: if (rearm_hangcheck) { - GEM_BUG_ON(!dev_priv->gt.awake); - i915_queue_hangcheck(dev_priv); + GEM_BUG_ON(!gt->awake); + i915_queue_hangcheck(i915); } /* @@ -3035,11 +3027,11 @@ i915_gem_idle_work_handler(struct work_struct *work) * period, and then queue a task (that will run last on the wq) to * shrink and re-optimize the caches. */ - if (same_epoch(dev_priv, epoch)) { + if (same_epoch(i915, epoch)) { struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s) { init_rcu_head(&s->rcu); - s->i915 = dev_priv; + s->i915 = i915; s->epoch = epoch; call_rcu(&s->rcu, __sleep_rcu); } @@ -3249,7 +3241,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, return err; i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); } return 0; @@ -4458,10 +4449,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); } -int i915_gem_suspend(struct drm_i915_private *i915) +void i915_gem_suspend(struct drm_i915_private *i915) { intel_wakeref_t wakeref; - int ret; GEM_TRACE("\n"); @@ -4481,19 +4471,7 @@ int i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - if (!i915_terminally_wedged(&i915->gpu_error)) { - ret = i915_gem_switch_to_kernel_context(i915); - if (ret) - goto err_unlock; - - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - HZ / 5); - if (ret == -EINTR) - goto err_unlock; - + if (!switch_to_kernel_context_sync(i915)) { /* Forcibly cancel outstanding work and leave the gpu quiet. */ i915_gem_set_wedged(i915); } @@ -4517,12 +4495,6 @@ int i915_gem_suspend(struct drm_i915_private *i915) GEM_BUG_ON(i915->gt.awake); intel_runtime_pm_put(i915, wakeref); - return 0; - -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); - return ret; } void i915_gem_suspend_late(struct drm_i915_private *i915) @@ -4788,18 +4760,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) goto err_active; } - err = i915_gem_switch_to_kernel_context(i915); - if (err) - goto err_active; - - if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { - i915_gem_set_wedged(i915); + if (!switch_to_kernel_context_sync(i915)) { err = -EIO; /* Caller will declare us wedged */ goto err_active; } - assert_kernel_context_is_current(i915); - /* * Immediately park the GPU so that we enable powersaving and * treat it as idle. The next time we issue a request, we will @@ -5043,7 +5008,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_init_hw: mutex_unlock(&dev_priv->drm.struct_mutex); - WARN_ON(i915_gem_suspend(dev_priv)); + i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); i915_gem_drain_workqueue(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 280813a4bf82..1bdb067845f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -745,6 +745,10 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!i915->kernel_context); + /* Inoperable, so presume the GPU is safely pointing into the void! */ + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + i915_retire_requests(i915); for_each_engine(engine, i915, id) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index e77b7ed449ae..50bb7bbd26d3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -84,14 +84,9 @@ static void simulate_hibernate(struct drm_i915_private *i915) static int pm_prepare(struct drm_i915_private *i915) { - int err = 0; - - if (i915_gem_suspend(i915)) { - pr_err("i915_gem_suspend failed\n"); - err = -EINVAL; - } + i915_gem_suspend(i915); - return err; + return 0; } static void pm_suspend(struct drm_i915_private *i915)

[24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling

Commit Message

Comments

Patch