Message ID | 20230524090521.596399-3-luciano.coelho@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: implement internal workqueues | expand |
On 24/05/2023 10:05, Luca Coelho wrote: > In order to avoid flush_scheduled_work() usage, add a dedicated > workqueue in the drm_i915_private structure. In this way, we don't > need to use the system queue anymore. > > This change is mostly mechanical and based on Tetsuo's original > patch[1]. > > Link: https://patchwork.freedesktop.org/series/114608/ [1] > Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Jani Nikula <jani.nikula@intel.com> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > Signed-off-by: Luca Coelho <luciano.coelho@intel.com> > --- > drivers/gpu/drm/i915/display/intel_display.c | 5 ++-- > .../drm/i915/display/intel_display_driver.c | 2 +- > drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- > drivers/gpu/drm/i915/display/intel_dp.c | 2 +- > .../drm/i915/display/intel_dp_link_training.c | 3 ++- > drivers/gpu/drm/i915/display/intel_drrs.c | 4 +++- > drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- > drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++- > drivers/gpu/drm/i915/display/intel_hdcp.c | 23 +++++++++++-------- > drivers/gpu/drm/i915/display/intel_hotplug.c | 18 ++++++++++----- > drivers/gpu/drm/i915/display/intel_opregion.c | 3 ++- > drivers/gpu/drm/i915/display/intel_pps.c | 4 +++- > drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++--- > .../drm/i915/gt/intel_execlists_submission.c | 5 ++-- > .../gpu/drm/i915/gt/intel_gt_buffer_pool.c | 10 ++++---- > drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 10 ++++---- > drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- > drivers/gpu/drm/i915/gt/intel_rps.c | 20 ++++++++-------- > drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- > drivers/gpu/drm/i915/i915_driver.c | 11 +++++++++ > drivers/gpu/drm/i915/i915_drv.h | 9 +++++++- > drivers/gpu/drm/i915/i915_request.c | 2 +- > drivers/gpu/drm/i915/intel_wakeref.c | 2 +- > 24 files changed, 99 insertions(+), 55 deletions(-) I'll take a look at the gt/ parts only since display experts need to okay the point Daniel raise anyway. 8< > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index 750326434677..2ebd937f3b4c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine) > > static void execlists_capture(struct intel_engine_cs *engine) > { > + struct drm_i915_private *i915 = engine->i915; > struct execlists_capture *cap; > > if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) > @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine) > goto err_rq; > > INIT_WORK(&cap->work, execlists_capture_work); > - schedule_work(&cap->work); > + queue_work(i915->unordered_wq, &cap->work); > return; > > err_rq: > @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref) > * lock, we can delegate the free of the engine to an RCU worker. > */ > INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); > - queue_rcu_work(system_wq, &ve->rcu); > + queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu); > } > > static void virtual_engine_initial_hint(struct virtual_engine *ve) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c > index cadfd85785b1..86b5a9ba323d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c > @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk) > { > struct intel_gt_buffer_pool *pool = > container_of(wrk, typeof(*pool), work.work); > + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); Okay. Or alternatively, pool = >->buffer_pool, might read simpler... > > if (pool_free_older_than(pool, HZ)) > - schedule_delayed_work(&pool->work, > - round_jiffies_up_relative(HZ)); > + queue_delayed_work(gt->i915->unordered_wq, &pool->work, > + round_jiffies_up_relative(HZ)); > } > > static void pool_retire(struct i915_active *ref) > @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref) > struct intel_gt_buffer_pool_node *node = > container_of(ref, typeof(*node), active); > struct intel_gt_buffer_pool *pool = node->pool; > + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't just gain a gt backpointer? That would decouple things more instead of tying the implementation with intel_gt implicitly. Not a strong direction though. > struct list_head *list = bucket_for_size(pool, node->obj->base.size); > unsigned long flags; > > @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref) > WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */ > spin_unlock_irqrestore(&pool->lock, flags); > > - schedule_delayed_work(&pool->work, > - round_jiffies_up_relative(HZ)); > + queue_delayed_work(gt->i915->unordered_wq, &pool->work, > + round_jiffies_up_relative(HZ)); > } > > void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c > index 8f888d36f16d..62fd00c9e519 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c > @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) > if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) > gt->i915->l3_parity.which_slice |= 1 << 0; > > - schedule_work(>->i915->l3_parity.error_work); > + queue_work(gt->i915->unordered_wq, >->i915->l3_parity.error_work); > } > > void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > index 1dfd01668c79..d1a382dfaa1d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, > GEM_BUG_ON(intel_engine_is_virtual(engine)); > > if (add_retire(engine, tl)) > - schedule_work(&engine->retire_work); > + queue_work(engine->i915->unordered_wq, &engine->retire_work); > } > > void intel_engine_init_retire(struct intel_engine_cs *engine) > @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work) > struct intel_gt *gt = > container_of(work, typeof(*gt), requests.retire_work.work); > > - schedule_delayed_work(>->requests.retire_work, > - round_jiffies_up_relative(HZ)); > + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, > + round_jiffies_up_relative(HZ)); > intel_gt_retire_requests(gt); > } > > @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt) > > void intel_gt_unpark_requests(struct intel_gt *gt) > { > - schedule_delayed_work(>->requests.retire_work, > - round_jiffies_up_relative(HZ)); > + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, > + round_jiffies_up_relative(HZ)); > } > > void intel_gt_fini_requests(struct intel_gt *gt) > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > index 195ff72d7a14..e2152f75ba2e 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w, > w->name = name; > > INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); > - schedule_delayed_work(&w->work, timeout); > + queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout); > } > > void __intel_fini_wedge(struct intel_wedge_me *w) > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c > index e68a99205599..e92e626d4994 100644 > --- a/drivers/gpu/drm/i915/gt/intel_rps.c > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c > @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) > static void rps_timer(struct timer_list *t) > { > struct intel_rps *rps = from_timer(rps, t, timer); > + struct intel_gt *gt = rps_to_gt(rps); > struct intel_engine_cs *engine; > ktime_t dt, last, timestamp; > enum intel_engine_id id; > s64 max_busy[3] = {}; > > timestamp = 0; > - for_each_engine(engine, rps_to_gt(rps), id) { > + for_each_engine(engine, gt, id) { > s64 busy; > int i; > > @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t) > > busy += div_u64(max_busy[i], 1 << i); > } > - GT_TRACE(rps_to_gt(rps), > + GT_TRACE(gt, > "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", > busy, (int)div64_u64(100 * busy, dt), > max_busy[0], max_busy[1], max_busy[2], > @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t) > rps->cur_freq < rps->max_freq_softlimit) { > rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; > rps->pm_interval = 1; > - schedule_work(&rps->work); > + queue_work(gt->i915->unordered_wq, &rps->work); > } else if (100 * busy < rps->power.down_threshold * dt && > rps->cur_freq > rps->min_freq_softlimit) { > rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; > rps->pm_interval = 1; > - schedule_work(&rps->work); > + queue_work(gt->i915->unordered_wq, &rps->work); > } else { > rps->last_adj = 0; > } > @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val) > } > mutex_unlock(&rps->lock); > if (boost) > - schedule_work(&rps->work); > + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); > > return 0; > } > @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq) > if (!atomic_fetch_inc(&slpc->num_waiters)) { > GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", > rq->fence.context, rq->fence.seqno); > - schedule_work(&slpc->boost_work); > + queue_work(rps_to_gt(rps)->i915->unordered_wq, > + &slpc->boost_work); > } > > return; > @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq) > rq->fence.context, rq->fence.seqno); > > if (READ_ONCE(rps->cur_freq) < rps->boost_freq) > - schedule_work(&rps->work); > + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); > > WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ > } > @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) > gen6_gt_pm_mask_irq(gt, events); > > rps->pm_iir |= events; > - schedule_work(&rps->work); > + queue_work(gt->i915->unordered_wq, &rps->work); > } > > void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) > @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) > gen6_gt_pm_mask_irq(gt, events); > rps->pm_iir |= events; > > - schedule_work(&rps->work); > + queue_work(gt->i915->unordered_wq, &rps->work); > spin_unlock(gt->irq_lock); > } > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > index 542ce6d2de19..78cdfc6f315f 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt) > > /* Boost gpufreq to max [waitboost] and keep it fixed */ > atomic_inc(>->rps.num_waiters); > - schedule_work(>->rps.work); > + queue_work(gt->i915->unordered_wq, >->rps.work); > flush_work(>->rps.work); > } > > diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c > index 522733a89946..88808aa85b26 100644 > --- a/drivers/gpu/drm/i915/i915_driver.c > +++ b/drivers/gpu/drm/i915/i915_driver.c > @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) > if (dev_priv->display.hotplug.dp_wq == NULL) > goto out_free_wq; > > + /* > + * The unordered i915 workqueue should be used for all work > + * scheduling that do not require running in order. > + */ Ha-ha. Nice cop out. ;) But okay, now that we have two we don't know when to use each that well so not fair on you to figure it out. > + dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0); > + if (dev_priv->unordered_wq == NULL) > + goto out_free_dp_wq; > + > return 0; > > +out_free_dp_wq: > + destroy_workqueue(dev_priv->unordered_wq); Wrong wq. > out_free_wq: > destroy_workqueue(dev_priv->wq); > out_err: > @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) > > static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) > { > + destroy_workqueue(dev_priv->unordered_wq); > destroy_workqueue(dev_priv->display.hotplug.dp_wq); > destroy_workqueue(dev_priv->wq); > } > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 14c5338c96a6..8f2665e8afb5 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -259,6 +259,14 @@ struct drm_i915_private { > */ > struct workqueue_struct *wq; > > + /** > + * unordered_wq - internal workqueue for unordered work > + * > + * This workqueue should be used for all unordered work > + * scheduling within i915. Proably add something like ", which used to be scheduled on the system_wq before moving to a driver instance due deprecation of flush_scheduled_work()." That way we leave some note to the reader. > + */ > + struct workqueue_struct *unordered_wq; > + > /* pm private clock gating functions */ > const struct drm_i915_clock_gating_funcs *clock_gating_funcs; > > @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > > #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \ > GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) > - Tidy if you can please. > #endif > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 630a732aaecc..894068bb37b6 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) > > if (!i915_request_completed(rq)) { > if (llist_add(&rq->watchdog.link, >->watchdog.list)) > - schedule_work(>->watchdog.work); > + queue_work(gt->i915->unordered_wq, >->watchdog.work); > } else { > i915_request_put(rq); > } > diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c > index 40aafe676017..497ea21a347e 100644 > --- a/drivers/gpu/drm/i915/intel_wakeref.c > +++ b/drivers/gpu/drm/i915/intel_wakeref.c > @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) > > /* Assume we are not in process context and so cannot sleep. */ > if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { > - mod_delayed_work(system_wq, &wf->work, > + mod_delayed_work(wf->i915->wq, &wf->work, > FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags)); > return; > } Pending the one or two details the non-display parts look good to me. Regards, Tvrtko
On 24/05/2023 12:00, Tvrtko Ursulin wrote: > > On 24/05/2023 10:05, Luca Coelho wrote: 8< >> if (pool_free_older_than(pool, HZ)) >> - schedule_delayed_work(&pool->work, >> - round_jiffies_up_relative(HZ)); >> + queue_delayed_work(gt->i915->unordered_wq, &pool->work, >> + round_jiffies_up_relative(HZ)); >> } >> static void pool_retire(struct i915_active *ref) >> @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref) >> struct intel_gt_buffer_pool_node *node = >> container_of(ref, typeof(*node), active); >> struct intel_gt_buffer_pool *pool = node->pool; >> + struct intel_gt *gt = container_of(pool, struct intel_gt, >> buffer_pool); > > ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't > just gain a gt backpointer? That would decouple things more instead of > tying the implementation with intel_gt implicitly. Not a strong > direction though. Never mind on this point, code already assumes this relationships for instance in node_create(). Regards, Tvrtko
On Wed, 2023-05-24 at 12:00 +0100, Tvrtko Ursulin wrote: > On 24/05/2023 10:05, Luca Coelho wrote: > > In order to avoid flush_scheduled_work() usage, add a dedicated > > workqueue in the drm_i915_private structure. In this way, we don't > > need to use the system queue anymore. > > > > This change is mostly mechanical and based on Tetsuo's original > > patch[1]. > > > > Link: https://patchwork.freedesktop.org/series/114608/ [1] > > Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Cc: Jani Nikula <jani.nikula@intel.com> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > > Signed-off-by: Luca Coelho <luciano.coelho@intel.com> > > --- > > drivers/gpu/drm/i915/display/intel_display.c | 5 ++-- > > .../drm/i915/display/intel_display_driver.c | 2 +- > > drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- > > drivers/gpu/drm/i915/display/intel_dp.c | 2 +- > > .../drm/i915/display/intel_dp_link_training.c | 3 ++- > > drivers/gpu/drm/i915/display/intel_drrs.c | 4 +++- > > drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- > > drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++- > > drivers/gpu/drm/i915/display/intel_hdcp.c | 23 +++++++++++-------- > > drivers/gpu/drm/i915/display/intel_hotplug.c | 18 ++++++++++----- > > drivers/gpu/drm/i915/display/intel_opregion.c | 3 ++- > > drivers/gpu/drm/i915/display/intel_pps.c | 4 +++- > > drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++--- > > .../drm/i915/gt/intel_execlists_submission.c | 5 ++-- > > .../gpu/drm/i915/gt/intel_gt_buffer_pool.c | 10 ++++---- > > drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- > > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 10 ++++---- > > drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- > > drivers/gpu/drm/i915/gt/intel_rps.c | 20 ++++++++-------- > > drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- > > drivers/gpu/drm/i915/i915_driver.c | 11 +++++++++ > > drivers/gpu/drm/i915/i915_drv.h | 9 +++++++- > > drivers/gpu/drm/i915/i915_request.c | 2 +- > > drivers/gpu/drm/i915/intel_wakeref.c | 2 +- > > 24 files changed, 99 insertions(+), 55 deletions(-) > > I'll take a look at the gt/ parts only since display experts need to > okay the point Daniel raise anyway. Thanks! > 8< > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > index 750326434677..2ebd937f3b4c 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine) > > > > static void execlists_capture(struct intel_engine_cs *engine) > > { > > + struct drm_i915_private *i915 = engine->i915; > > struct execlists_capture *cap; > > > > if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) > > @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine) > > goto err_rq; > > > > INIT_WORK(&cap->work, execlists_capture_work); > > - schedule_work(&cap->work); > > + queue_work(i915->unordered_wq, &cap->work); > > return; > > > > err_rq: > > @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref) > > * lock, we can delegate the free of the engine to an RCU worker. > > */ > > INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); > > - queue_rcu_work(system_wq, &ve->rcu); > > + queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu); > > } > > > > static void virtual_engine_initial_hint(struct virtual_engine *ve) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c > > index cadfd85785b1..86b5a9ba323d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c > > @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk) > > { > > struct intel_gt_buffer_pool *pool = > > container_of(wrk, typeof(*pool), work.work); > > + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); > > Okay. Or alternatively, pool = >->buffer_pool, might read simpler... Sorry, I don't follow. wrk is inside intel_gt_buffer_pool, so we can derive pool from work. And then we know that pool is inside intel_gt, so we derive gt from that. How would I get gt first and derive pool from that? > > > > if (pool_free_older_than(pool, HZ)) > > - schedule_delayed_work(&pool->work, > > - round_jiffies_up_relative(HZ)); > > + queue_delayed_work(gt->i915->unordered_wq, &pool->work, > > + round_jiffies_up_relative(HZ)); > > } > > > > static void pool_retire(struct i915_active *ref) > > @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref) > > struct intel_gt_buffer_pool_node *node = > > container_of(ref, typeof(*node), active); > > struct intel_gt_buffer_pool *pool = node->pool; > > + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); > > ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't > just gain a gt backpointer? That would decouple things more instead of > tying the implementation with intel_gt implicitly. Not a strong > direction though. > > > struct list_head *list = bucket_for_size(pool, node->obj->base.size); > > unsigned long flags; > > > > @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref) > > WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */ > > spin_unlock_irqrestore(&pool->lock, flags); > > > > - schedule_delayed_work(&pool->work, > > - round_jiffies_up_relative(HZ)); > > + queue_delayed_work(gt->i915->unordered_wq, &pool->work, > > + round_jiffies_up_relative(HZ)); > > } > > > > void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c > > index 8f888d36f16d..62fd00c9e519 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c > > @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) > > if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) > > gt->i915->l3_parity.which_slice |= 1 << 0; > > > > - schedule_work(>->i915->l3_parity.error_work); > > + queue_work(gt->i915->unordered_wq, >->i915->l3_parity.error_work); > > } > > > > void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > index 1dfd01668c79..d1a382dfaa1d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, > > GEM_BUG_ON(intel_engine_is_virtual(engine)); > > > > if (add_retire(engine, tl)) > > - schedule_work(&engine->retire_work); > > + queue_work(engine->i915->unordered_wq, &engine->retire_work); > > } > > > > void intel_engine_init_retire(struct intel_engine_cs *engine) > > @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work) > > struct intel_gt *gt = > > container_of(work, typeof(*gt), requests.retire_work.work); > > > > - schedule_delayed_work(>->requests.retire_work, > > - round_jiffies_up_relative(HZ)); > > + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, > > + round_jiffies_up_relative(HZ)); > > intel_gt_retire_requests(gt); > > } > > > > @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt) > > > > void intel_gt_unpark_requests(struct intel_gt *gt) > > { > > - schedule_delayed_work(>->requests.retire_work, > > - round_jiffies_up_relative(HZ)); > > + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, > > + round_jiffies_up_relative(HZ)); > > } > > > > void intel_gt_fini_requests(struct intel_gt *gt) > > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > > index 195ff72d7a14..e2152f75ba2e 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > > @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w, > > w->name = name; > > > > INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); > > - schedule_delayed_work(&w->work, timeout); > > + queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout); > > } > > > > void __intel_fini_wedge(struct intel_wedge_me *w) > > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c > > index e68a99205599..e92e626d4994 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_rps.c > > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c > > @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) > > static void rps_timer(struct timer_list *t) > > { > > struct intel_rps *rps = from_timer(rps, t, timer); > > + struct intel_gt *gt = rps_to_gt(rps); > > struct intel_engine_cs *engine; > > ktime_t dt, last, timestamp; > > enum intel_engine_id id; > > s64 max_busy[3] = {}; > > > > timestamp = 0; > > - for_each_engine(engine, rps_to_gt(rps), id) { > > + for_each_engine(engine, gt, id) { > > s64 busy; > > int i; > > > > @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t) > > > > busy += div_u64(max_busy[i], 1 << i); > > } > > - GT_TRACE(rps_to_gt(rps), > > + GT_TRACE(gt, > > "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", > > busy, (int)div64_u64(100 * busy, dt), > > max_busy[0], max_busy[1], max_busy[2], > > @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t) > > rps->cur_freq < rps->max_freq_softlimit) { > > rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; > > rps->pm_interval = 1; > > - schedule_work(&rps->work); > > + queue_work(gt->i915->unordered_wq, &rps->work); > > } else if (100 * busy < rps->power.down_threshold * dt && > > rps->cur_freq > rps->min_freq_softlimit) { > > rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; > > rps->pm_interval = 1; > > - schedule_work(&rps->work); > > + queue_work(gt->i915->unordered_wq, &rps->work); > > } else { > > rps->last_adj = 0; > > } > > @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val) > > } > > mutex_unlock(&rps->lock); > > if (boost) > > - schedule_work(&rps->work); > > + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); > > > > return 0; > > } > > @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq) > > if (!atomic_fetch_inc(&slpc->num_waiters)) { > > GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", > > rq->fence.context, rq->fence.seqno); > > - schedule_work(&slpc->boost_work); > > + queue_work(rps_to_gt(rps)->i915->unordered_wq, > > + &slpc->boost_work); > > } > > > > return; > > @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq) > > rq->fence.context, rq->fence.seqno); > > > > if (READ_ONCE(rps->cur_freq) < rps->boost_freq) > > - schedule_work(&rps->work); > > + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); > > > > WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ > > } > > @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) > > gen6_gt_pm_mask_irq(gt, events); > > > > rps->pm_iir |= events; > > - schedule_work(&rps->work); > > + queue_work(gt->i915->unordered_wq, &rps->work); > > } > > > > void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) > > @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) > > gen6_gt_pm_mask_irq(gt, events); > > rps->pm_iir |= events; > > > > - schedule_work(&rps->work); > > + queue_work(gt->i915->unordered_wq, &rps->work); > > spin_unlock(gt->irq_lock); > > } > > > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > > index 542ce6d2de19..78cdfc6f315f 100644 > > --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c > > @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt) > > > > /* Boost gpufreq to max [waitboost] and keep it fixed */ > > atomic_inc(>->rps.num_waiters); > > - schedule_work(>->rps.work); > > + queue_work(gt->i915->unordered_wq, >->rps.work); > > flush_work(>->rps.work); > > } > > > > diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c > > index 522733a89946..88808aa85b26 100644 > > --- a/drivers/gpu/drm/i915/i915_driver.c > > +++ b/drivers/gpu/drm/i915/i915_driver.c > > @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) > > if (dev_priv->display.hotplug.dp_wq == NULL) > > goto out_free_wq; > > > > + /* > > + * The unordered i915 workqueue should be used for all work > > + * scheduling that do not require running in order. > > + */ > > Ha-ha. Nice cop out. ;) But okay, now that we have two we don't know > when to use each that well so not fair on you to figure it out. :D To be honest, I looked at all the existing users and described pretty much what is happening. As far as I could tell, someone started using the system queue a long time ago and, in lack of better alternatives, everyone else who needed a work followed that. So the new queue is indeed a catch all for non-ordered queues at the moment (in pretty much the same way as the system queue was before)... Additionally, I don't know all the current users well, since they are everywhere, and I don't think there has been a specific rule when to use the system queue before. I'm totally open for better descriptions. :) > > + dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0); > > + if (dev_priv->unordered_wq == NULL) > > + goto out_free_dp_wq; > > + > > return 0; > > > > +out_free_dp_wq: > > + destroy_workqueue(dev_priv->unordered_wq); > > Wrong wq. Ouch. Good catch. > > out_free_wq: > > destroy_workqueue(dev_priv->wq); > > out_err: > > @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) > > > > static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) > > { > > + destroy_workqueue(dev_priv->unordered_wq); > > destroy_workqueue(dev_priv->display.hotplug.dp_wq); > > destroy_workqueue(dev_priv->wq); > > } > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index 14c5338c96a6..8f2665e8afb5 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -259,6 +259,14 @@ struct drm_i915_private { > > */ > > struct workqueue_struct *wq; > > > > + /** > > + * unordered_wq - internal workqueue for unordered work > > + * > > + * This workqueue should be used for all unordered work > > + * scheduling within i915. > > Proably add something like ", which used to be scheduled on the > system_wq before moving to a driver instance due deprecation of > flush_scheduled_work()." I did start writing something like that, but then I thought that, in the code, writing about the history of the implementation doesn't add much. We have the git log for that. But, obviously, I can add that if you want. > That way we leave some note to the reader. > > > + */ > > + struct workqueue_struct *unordered_wq; > > + > > /* pm private clock gating functions */ > > const struct drm_i915_clock_gating_funcs *clock_gating_funcs; > > > > @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > > > > #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \ > > GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) > > - > > Tidy if you can please. Oops! > > #endif > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > index 630a732aaecc..894068bb37b6 100644 > > --- a/drivers/gpu/drm/i915/i915_request.c > > +++ b/drivers/gpu/drm/i915/i915_request.c > > @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) > > > > if (!i915_request_completed(rq)) { > > if (llist_add(&rq->watchdog.link, >->watchdog.list)) > > - schedule_work(>->watchdog.work); > > + queue_work(gt->i915->unordered_wq, >->watchdog.work); > > } else { > > i915_request_put(rq); > > } > > diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c > > index 40aafe676017..497ea21a347e 100644 > > --- a/drivers/gpu/drm/i915/intel_wakeref.c > > +++ b/drivers/gpu/drm/i915/intel_wakeref.c > > @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) > > > > /* Assume we are not in process context and so cannot sleep. */ > > if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { > > - mod_delayed_work(system_wq, &wf->work, > > + mod_delayed_work(wf->i915->wq, &wf->work, > > FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags)); > > return; > > } > > Pending the one or two details the non-display parts look good to me. Great, thanks! I'll send a new version addressing your comments and wait for display parts review. -- Cheers, Luca.
On Wed, 24 May 2023, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote: > On 24/05/2023 10:05, Luca Coelho wrote: >> In order to avoid flush_scheduled_work() usage, add a dedicated >> workqueue in the drm_i915_private structure. In this way, we don't >> need to use the system queue anymore. >> >> This change is mostly mechanical and based on Tetsuo's original >> patch[1]. >> >> Link: https://patchwork.freedesktop.org/series/114608/ [1] >> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Jani Nikula <jani.nikula@intel.com> >> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> >> Signed-off-by: Luca Coelho <luciano.coelho@intel.com> >> --- >> drivers/gpu/drm/i915/display/intel_display.c | 5 ++-- >> .../drm/i915/display/intel_display_driver.c | 2 +- >> drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- >> drivers/gpu/drm/i915/display/intel_dp.c | 2 +- >> .../drm/i915/display/intel_dp_link_training.c | 3 ++- >> drivers/gpu/drm/i915/display/intel_drrs.c | 4 +++- >> drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- >> drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++- >> drivers/gpu/drm/i915/display/intel_hdcp.c | 23 +++++++++++-------- >> drivers/gpu/drm/i915/display/intel_hotplug.c | 18 ++++++++++----- >> drivers/gpu/drm/i915/display/intel_opregion.c | 3 ++- >> drivers/gpu/drm/i915/display/intel_pps.c | 4 +++- >> drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++--- >> .../drm/i915/gt/intel_execlists_submission.c | 5 ++-- >> .../gpu/drm/i915/gt/intel_gt_buffer_pool.c | 10 ++++---- >> drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- >> drivers/gpu/drm/i915/gt/intel_gt_requests.c | 10 ++++---- >> drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- >> drivers/gpu/drm/i915/gt/intel_rps.c | 20 ++++++++-------- >> drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- >> drivers/gpu/drm/i915/i915_driver.c | 11 +++++++++ >> drivers/gpu/drm/i915/i915_drv.h | 9 +++++++- >> drivers/gpu/drm/i915/i915_request.c | 2 +- >> drivers/gpu/drm/i915/intel_wakeref.c | 2 +- >> 24 files changed, 99 insertions(+), 55 deletions(-) > > I'll take a look at the gt/ parts only since display experts need to > okay the point Daniel raise anyway. There's a bunch of lockdep failures in BAT. Are they possible related to switching to unordered wq? BR, Jani > > 8< >> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> index 750326434677..2ebd937f3b4c 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine) >> >> static void execlists_capture(struct intel_engine_cs *engine) >> { >> + struct drm_i915_private *i915 = engine->i915; >> struct execlists_capture *cap; >> >> if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) >> @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine) >> goto err_rq; >> >> INIT_WORK(&cap->work, execlists_capture_work); >> - schedule_work(&cap->work); >> + queue_work(i915->unordered_wq, &cap->work); >> return; >> >> err_rq: >> @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref) >> * lock, we can delegate the free of the engine to an RCU worker. >> */ >> INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); >> - queue_rcu_work(system_wq, &ve->rcu); >> + queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu); >> } >> >> static void virtual_engine_initial_hint(struct virtual_engine *ve) >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c >> index cadfd85785b1..86b5a9ba323d 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c >> @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk) >> { >> struct intel_gt_buffer_pool *pool = >> container_of(wrk, typeof(*pool), work.work); >> + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); > > Okay. Or alternatively, pool = >->buffer_pool, might read simpler... > >> >> if (pool_free_older_than(pool, HZ)) >> - schedule_delayed_work(&pool->work, >> - round_jiffies_up_relative(HZ)); >> + queue_delayed_work(gt->i915->unordered_wq, &pool->work, >> + round_jiffies_up_relative(HZ)); >> } >> >> static void pool_retire(struct i915_active *ref) >> @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref) >> struct intel_gt_buffer_pool_node *node = >> container_of(ref, typeof(*node), active); >> struct intel_gt_buffer_pool *pool = node->pool; >> + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); > > ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't > just gain a gt backpointer? That would decouple things more instead of > tying the implementation with intel_gt implicitly. Not a strong > direction though. > >> struct list_head *list = bucket_for_size(pool, node->obj->base.size); >> unsigned long flags; >> >> @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref) >> WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */ >> spin_unlock_irqrestore(&pool->lock, flags); >> >> - schedule_delayed_work(&pool->work, >> - round_jiffies_up_relative(HZ)); >> + queue_delayed_work(gt->i915->unordered_wq, &pool->work, >> + round_jiffies_up_relative(HZ)); >> } >> >> void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node) >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c >> index 8f888d36f16d..62fd00c9e519 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c >> @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) >> if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) >> gt->i915->l3_parity.which_slice |= 1 << 0; >> >> - schedule_work(>->i915->l3_parity.error_work); >> + queue_work(gt->i915->unordered_wq, >->i915->l3_parity.error_work); >> } >> >> void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c >> index 1dfd01668c79..d1a382dfaa1d 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c >> @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, >> GEM_BUG_ON(intel_engine_is_virtual(engine)); >> >> if (add_retire(engine, tl)) >> - schedule_work(&engine->retire_work); >> + queue_work(engine->i915->unordered_wq, &engine->retire_work); >> } >> >> void intel_engine_init_retire(struct intel_engine_cs *engine) >> @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work) >> struct intel_gt *gt = >> container_of(work, typeof(*gt), requests.retire_work.work); >> >> - schedule_delayed_work(>->requests.retire_work, >> - round_jiffies_up_relative(HZ)); >> + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, >> + round_jiffies_up_relative(HZ)); >> intel_gt_retire_requests(gt); >> } >> >> @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt) >> >> void intel_gt_unpark_requests(struct intel_gt *gt) >> { >> - schedule_delayed_work(>->requests.retire_work, >> - round_jiffies_up_relative(HZ)); >> + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, >> + round_jiffies_up_relative(HZ)); >> } >> >> void intel_gt_fini_requests(struct intel_gt *gt) >> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c >> index 195ff72d7a14..e2152f75ba2e 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_reset.c >> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c >> @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w, >> w->name = name; >> >> INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); >> - schedule_delayed_work(&w->work, timeout); >> + queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout); >> } >> >> void __intel_fini_wedge(struct intel_wedge_me *w) >> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c >> index e68a99205599..e92e626d4994 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_rps.c >> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c >> @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) >> static void rps_timer(struct timer_list *t) >> { >> struct intel_rps *rps = from_timer(rps, t, timer); >> + struct intel_gt *gt = rps_to_gt(rps); >> struct intel_engine_cs *engine; >> ktime_t dt, last, timestamp; >> enum intel_engine_id id; >> s64 max_busy[3] = {}; >> >> timestamp = 0; >> - for_each_engine(engine, rps_to_gt(rps), id) { >> + for_each_engine(engine, gt, id) { >> s64 busy; >> int i; >> >> @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t) >> >> busy += div_u64(max_busy[i], 1 << i); >> } >> - GT_TRACE(rps_to_gt(rps), >> + GT_TRACE(gt, >> "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", >> busy, (int)div64_u64(100 * busy, dt), >> max_busy[0], max_busy[1], max_busy[2], >> @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t) >> rps->cur_freq < rps->max_freq_softlimit) { >> rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; >> rps->pm_interval = 1; >> - schedule_work(&rps->work); >> + queue_work(gt->i915->unordered_wq, &rps->work); >> } else if (100 * busy < rps->power.down_threshold * dt && >> rps->cur_freq > rps->min_freq_softlimit) { >> rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; >> rps->pm_interval = 1; >> - schedule_work(&rps->work); >> + queue_work(gt->i915->unordered_wq, &rps->work); >> } else { >> rps->last_adj = 0; >> } >> @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val) >> } >> mutex_unlock(&rps->lock); >> if (boost) >> - schedule_work(&rps->work); >> + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); >> >> return 0; >> } >> @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq) >> if (!atomic_fetch_inc(&slpc->num_waiters)) { >> GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", >> rq->fence.context, rq->fence.seqno); >> - schedule_work(&slpc->boost_work); >> + queue_work(rps_to_gt(rps)->i915->unordered_wq, >> + &slpc->boost_work); >> } >> >> return; >> @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq) >> rq->fence.context, rq->fence.seqno); >> >> if (READ_ONCE(rps->cur_freq) < rps->boost_freq) >> - schedule_work(&rps->work); >> + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); >> >> WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ >> } >> @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) >> gen6_gt_pm_mask_irq(gt, events); >> >> rps->pm_iir |= events; >> - schedule_work(&rps->work); >> + queue_work(gt->i915->unordered_wq, &rps->work); >> } >> >> void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) >> @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) >> gen6_gt_pm_mask_irq(gt, events); >> rps->pm_iir |= events; >> >> - schedule_work(&rps->work); >> + queue_work(gt->i915->unordered_wq, &rps->work); >> spin_unlock(gt->irq_lock); >> } >> >> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c >> index 542ce6d2de19..78cdfc6f315f 100644 >> --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c >> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c >> @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt) >> >> /* Boost gpufreq to max [waitboost] and keep it fixed */ >> atomic_inc(>->rps.num_waiters); >> - schedule_work(>->rps.work); >> + queue_work(gt->i915->unordered_wq, >->rps.work); >> flush_work(>->rps.work); >> } >> >> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c >> index 522733a89946..88808aa85b26 100644 >> --- a/drivers/gpu/drm/i915/i915_driver.c >> +++ b/drivers/gpu/drm/i915/i915_driver.c >> @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) >> if (dev_priv->display.hotplug.dp_wq == NULL) >> goto out_free_wq; >> >> + /* >> + * The unordered i915 workqueue should be used for all work >> + * scheduling that do not require running in order. >> + */ > > Ha-ha. Nice cop out. ;) But okay, now that we have two we don't know > when to use each that well so not fair on you to figure it out. > >> + dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0); >> + if (dev_priv->unordered_wq == NULL) >> + goto out_free_dp_wq; >> + >> return 0; >> >> +out_free_dp_wq: >> + destroy_workqueue(dev_priv->unordered_wq); > > Wrong wq. > >> out_free_wq: >> destroy_workqueue(dev_priv->wq); >> out_err: >> @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) >> >> static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) >> { >> + destroy_workqueue(dev_priv->unordered_wq); >> destroy_workqueue(dev_priv->display.hotplug.dp_wq); >> destroy_workqueue(dev_priv->wq); >> } >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 14c5338c96a6..8f2665e8afb5 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -259,6 +259,14 @@ struct drm_i915_private { >> */ >> struct workqueue_struct *wq; >> >> + /** >> + * unordered_wq - internal workqueue for unordered work >> + * >> + * This workqueue should be used for all unordered work >> + * scheduling within i915. > > Proably add something like ", which used to be scheduled on the > system_wq before moving to a driver instance due deprecation of > flush_scheduled_work()." > > That way we leave some note to the reader. > >> + */ >> + struct workqueue_struct *unordered_wq; >> + >> /* pm private clock gating functions */ >> const struct drm_i915_clock_gating_funcs *clock_gating_funcs; >> >> @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, >> >> #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \ >> GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) >> - > > Tidy if you can please. > >> #endif >> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c >> index 630a732aaecc..894068bb37b6 100644 >> --- a/drivers/gpu/drm/i915/i915_request.c >> +++ b/drivers/gpu/drm/i915/i915_request.c >> @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) >> >> if (!i915_request_completed(rq)) { >> if (llist_add(&rq->watchdog.link, >->watchdog.list)) >> - schedule_work(>->watchdog.work); >> + queue_work(gt->i915->unordered_wq, >->watchdog.work); >> } else { >> i915_request_put(rq); >> } >> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c >> index 40aafe676017..497ea21a347e 100644 >> --- a/drivers/gpu/drm/i915/intel_wakeref.c >> +++ b/drivers/gpu/drm/i915/intel_wakeref.c >> @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) >> >> /* Assume we are not in process context and so cannot sleep. */ >> if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { >> - mod_delayed_work(system_wq, &wf->work, >> + mod_delayed_work(wf->i915->wq, &wf->work, >> FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags)); >> return; >> } > > Pending the one or two details the non-display parts look good to me. > > Regards, > > Tvrtko
On Fri, 2023-05-26 at 14:30 +0300, Jani Nikula wrote: > On Wed, 24 May 2023, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote: > > On 24/05/2023 10:05, Luca Coelho wrote: > > > In order to avoid flush_scheduled_work() usage, add a dedicated > > > workqueue in the drm_i915_private structure. In this way, we don't > > > need to use the system queue anymore. > > > > > > This change is mostly mechanical and based on Tetsuo's original > > > patch[1]. > > > > > > Link: https://patchwork.freedesktop.org/series/114608/ [1] > > > Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > > Cc: Jani Nikula <jani.nikula@intel.com> > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > > > Signed-off-by: Luca Coelho <luciano.coelho@intel.com> > > > --- > > > drivers/gpu/drm/i915/display/intel_display.c | 5 ++-- > > > .../drm/i915/display/intel_display_driver.c | 2 +- > > > drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- > > > drivers/gpu/drm/i915/display/intel_dp.c | 2 +- > > > .../drm/i915/display/intel_dp_link_training.c | 3 ++- > > > drivers/gpu/drm/i915/display/intel_drrs.c | 4 +++- > > > drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- > > > drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++- > > > drivers/gpu/drm/i915/display/intel_hdcp.c | 23 +++++++++++-------- > > > drivers/gpu/drm/i915/display/intel_hotplug.c | 18 ++++++++++----- > > > drivers/gpu/drm/i915/display/intel_opregion.c | 3 ++- > > > drivers/gpu/drm/i915/display/intel_pps.c | 4 +++- > > > drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++--- > > > .../drm/i915/gt/intel_execlists_submission.c | 5 ++-- > > > .../gpu/drm/i915/gt/intel_gt_buffer_pool.c | 10 ++++---- > > > drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- > > > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 10 ++++---- > > > drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- > > > drivers/gpu/drm/i915/gt/intel_rps.c | 20 ++++++++-------- > > > drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- > > > drivers/gpu/drm/i915/i915_driver.c | 11 +++++++++ > > > drivers/gpu/drm/i915/i915_drv.h | 9 +++++++- > > > drivers/gpu/drm/i915/i915_request.c | 2 +- > > > drivers/gpu/drm/i915/intel_wakeref.c | 2 +- > > > 24 files changed, 99 insertions(+), 55 deletions(-) > > > > I'll take a look at the gt/ parts only since display experts need to > > okay the point Daniel raise anyway. > > There's a bunch of lockdep failures in BAT. Are they possible related to > switching to unordered wq? I have not checked those results yet, because I'll send a new version soon anyway. I tested this before, but with the dedicated wakeref workqueues, and it all passed cleanly. So it _may_ be related to that change. I'll check it. -- Cheers, Luca.
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0490c6412ab5..f52b393382df 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7155,11 +7155,12 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence, break; case FENCE_FREE: { + struct drm_i915_private *i915 = to_i915(state->base.dev); struct intel_atomic_helper *helper = - &to_i915(state->base.dev)->display.atomic_helper; + &i915->display.atomic_helper; if (llist_add(&state->freed, &helper->free_list)) - schedule_work(&helper->free_work); + queue_work(i915->unordered_wq, &helper->free_work); break; } } diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 60ce10fc7205..a9e36ddb6c1d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -435,7 +435,7 @@ void intel_display_driver_remove_noirq(struct drm_i915_private *i915) intel_unregister_dsm_handler(); /* flush any delayed tasks or pending work */ - flush_scheduled_work(); + flush_workqueue(i915->unordered_wq); intel_hdcp_component_fini(i915); diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 8a88de67ff0a..5f479f3828bb 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -1057,7 +1057,7 @@ void intel_dmc_init(struct drm_i915_private *i915) i915->display.dmc.dmc = dmc; drm_dbg_kms(&i915->drm, "Loading %s\n", dmc->fw_path); - schedule_work(&dmc->work); + queue_work(i915->unordered_wq, &dmc->work); return; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4bec8cd7979f..1466283cd569 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5251,7 +5251,7 @@ static void intel_dp_oob_hotplug_event(struct drm_connector *connector) spin_lock_irq(&i915->irq_lock); i915->display.hotplug.event_bits |= BIT(encoder->hpd_pin); spin_unlock_irq(&i915->irq_lock); - queue_delayed_work(system_wq, &i915->display.hotplug.hotplug_work, 0); + queue_delayed_work(i915->unordered_wq, &i915->display.hotplug.hotplug_work, 0); } static const struct drm_connector_funcs intel_dp_connector_funcs = { diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 0952a707358c..2cccc0a970f1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1064,6 +1064,7 @@ static void intel_dp_schedule_fallback_link_training(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_connector *intel_connector = intel_dp->attached_connector; + struct drm_i915_private *i915 = dp_to_i915(intel_dp); if (!intel_digital_port_connected(&dp_to_dig_port(intel_dp)->base)) { lt_dbg(intel_dp, DP_PHY_DPRX, "Link Training failed on disconnected sink.\n"); @@ -1081,7 +1082,7 @@ static void intel_dp_schedule_fallback_link_training(struct intel_dp *intel_dp, } /* Schedule a Hotplug Uevent to userspace to start modeset */ - schedule_work(&intel_connector->modeset_retry_work); + queue_work(i915->unordered_wq, &intel_connector->modeset_retry_work); } /* Perform the link training on all LTTPRs and the DPRX on a link. */ diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index 760e63cdc0c8..0d35b6be5b6a 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -111,7 +111,9 @@ static void intel_drrs_set_state(struct intel_crtc *crtc, static void intel_drrs_schedule_work(struct intel_crtc *crtc) { - mod_delayed_work(system_wq, &crtc->drrs.work, msecs_to_jiffies(1000)); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + + mod_delayed_work(i915->unordered_wq, &crtc->drrs.work, msecs_to_jiffies(1000)); } static unsigned int intel_drrs_frontbuffer_bits(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 11bb8cf9c9d0..3ae284da78df 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1600,7 +1600,7 @@ static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) if (READ_ONCE(fbc->underrun_detected)) return; - schedule_work(&fbc->underrun_work); + queue_work(fbc->i915->unordered_wq, &fbc->underrun_work); } /** diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index aab1ae74a8f7..c88f13d319b9 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -689,7 +689,8 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous /* Don't block our own workqueue as this can * be run in parallel with other i915.ko tasks. */ - schedule_work(&dev_priv->display.fbdev.suspend_work); + queue_work(dev_priv->unordered_wq, + &dev_priv->display.fbdev.suspend_work); return; } } diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index dd539106ee5a..aa74b7b7b266 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -983,6 +983,7 @@ static void intel_hdcp_update_value(struct intel_connector *connector, struct drm_device *dev = connector->base.dev; struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; + struct drm_i915_private *i915 = to_i915(connector->base.dev); drm_WARN_ON(connector->base.dev, !mutex_is_locked(&hdcp->mutex)); @@ -1001,7 +1002,7 @@ static void intel_hdcp_update_value(struct intel_connector *connector, hdcp->value = value; if (update_property) { drm_connector_get(&connector->base); - schedule_work(&hdcp->prop_work); + queue_work(i915->unordered_wq, &hdcp->prop_work); } } @@ -2090,16 +2091,17 @@ static void intel_hdcp_check_work(struct work_struct *work) struct intel_hdcp, check_work); struct intel_connector *connector = intel_hdcp_to_connector(hdcp); + struct drm_i915_private *i915 = to_i915(connector->base.dev); if (drm_connector_is_unregistered(&connector->base)) return; if (!intel_hdcp2_check_link(connector)) - schedule_delayed_work(&hdcp->check_work, - DRM_HDCP2_CHECK_PERIOD_MS); + queue_delayed_work(i915->unordered_wq, &hdcp->check_work, + DRM_HDCP2_CHECK_PERIOD_MS); else if (!intel_hdcp_check_link(connector)) - schedule_delayed_work(&hdcp->check_work, - DRM_HDCP_CHECK_PERIOD_MS); + queue_delayed_work(i915->unordered_wq, &hdcp->check_work, + DRM_HDCP_CHECK_PERIOD_MS); } static int i915_hdcp_component_bind(struct device *i915_kdev, @@ -2398,7 +2400,8 @@ int intel_hdcp_enable(struct intel_atomic_state *state, } if (!ret) { - schedule_delayed_work(&hdcp->check_work, check_link_interval); + queue_delayed_work(dev_priv->unordered_wq, &hdcp->check_work, + check_link_interval); intel_hdcp_update_value(connector, DRM_MODE_CONTENT_PROTECTION_ENABLED, true); @@ -2447,6 +2450,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, to_intel_connector(conn_state->connector); struct intel_hdcp *hdcp = &connector->hdcp; bool content_protection_type_changed, desired_and_not_enabled = false; + struct drm_i915_private *i915 = to_i915(connector->base.dev); if (!connector->hdcp.shim) return; @@ -2473,7 +2477,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, mutex_lock(&hdcp->mutex); hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; drm_connector_get(&connector->base); - schedule_work(&hdcp->prop_work); + queue_work(i915->unordered_wq, &hdcp->prop_work); mutex_unlock(&hdcp->mutex); } @@ -2490,7 +2494,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, */ if (!desired_and_not_enabled && !content_protection_type_changed) { drm_connector_get(&connector->base); - schedule_work(&hdcp->prop_work); + queue_work(i915->unordered_wq, &hdcp->prop_work); } } @@ -2602,6 +2606,7 @@ void intel_hdcp_atomic_check(struct drm_connector *connector, void intel_hdcp_handle_cp_irq(struct intel_connector *connector) { struct intel_hdcp *hdcp = &connector->hdcp; + struct drm_i915_private *i915 = to_i915(connector->base.dev); if (!hdcp->shim) return; @@ -2609,5 +2614,5 @@ void intel_hdcp_handle_cp_irq(struct intel_connector *connector) atomic_inc(&connector->hdcp.cp_irq_count); wake_up_all(&connector->hdcp.cp_irq_queue); - schedule_delayed_work(&hdcp->check_work, 0); + queue_delayed_work(i915->unordered_wq, &hdcp->check_work, 0); } diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 23a5e1a875f1..1160fa20433b 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -212,7 +212,8 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv) /* Enable polling and queue hotplug re-enabling. */ if (hpd_disabled) { drm_kms_helper_poll_enable(&dev_priv->drm); - mod_delayed_work(system_wq, &dev_priv->display.hotplug.reenable_work, + mod_delayed_work(dev_priv->unordered_wq, + &dev_priv->display.hotplug.reenable_work, msecs_to_jiffies(HPD_STORM_REENABLE_DELAY)); } } @@ -339,7 +340,8 @@ static void i915_digport_work_func(struct work_struct *work) spin_lock_irq(&dev_priv->irq_lock); dev_priv->display.hotplug.event_bits |= old_bits; spin_unlock_irq(&dev_priv->irq_lock); - queue_delayed_work(system_wq, &dev_priv->display.hotplug.hotplug_work, 0); + queue_delayed_work(dev_priv->unordered_wq, + &dev_priv->display.hotplug.hotplug_work, 0); } } @@ -446,7 +448,8 @@ static void i915_hotplug_work_func(struct work_struct *work) dev_priv->display.hotplug.retry_bits |= retry; spin_unlock_irq(&dev_priv->irq_lock); - mod_delayed_work(system_wq, &dev_priv->display.hotplug.hotplug_work, + mod_delayed_work(dev_priv->unordered_wq, + &dev_priv->display.hotplug.hotplug_work, msecs_to_jiffies(HPD_RETRY_DELAY)); } } @@ -577,7 +580,8 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (queue_dig) queue_work(dev_priv->display.hotplug.dp_wq, &dev_priv->display.hotplug.dig_port_work); if (queue_hp) - queue_delayed_work(system_wq, &dev_priv->display.hotplug.hotplug_work, 0); + queue_delayed_work(dev_priv->unordered_wq, + &dev_priv->display.hotplug.hotplug_work, 0); } /** @@ -687,7 +691,8 @@ void intel_hpd_poll_enable(struct drm_i915_private *dev_priv) * As well, there's no issue if we race here since we always reschedule * this worker anyway */ - schedule_work(&dev_priv->display.hotplug.poll_init_work); + queue_work(dev_priv->unordered_wq, + &dev_priv->display.hotplug.poll_init_work); } /** @@ -715,7 +720,8 @@ void intel_hpd_poll_disable(struct drm_i915_private *dev_priv) return; WRITE_ONCE(dev_priv->display.hotplug.poll_enabled, false); - schedule_work(&dev_priv->display.hotplug.poll_init_work); + queue_work(dev_priv->unordered_wq, + &dev_priv->display.hotplug.poll_init_work); } void intel_hpd_init_early(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index b7973a05d022..84078fb82b2f 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -635,7 +635,8 @@ static void asle_work(struct work_struct *work) void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) { if (dev_priv->display.opregion.asle) - schedule_work(&dev_priv->display.opregion.asle_work); + queue_work(dev_priv->unordered_wq, + &dev_priv->display.opregion.asle_work); } #define ACPI_EV_DISPLAY_SWITCH (1<<0) diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 5e7ba594e7e7..73f0f1714b37 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -867,6 +867,7 @@ static void edp_panel_vdd_work(struct work_struct *__work) static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp) { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); unsigned long delay; /* @@ -882,7 +883,8 @@ static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp) * operations. */ delay = msecs_to_jiffies(intel_dp->pps.panel_power_cycle_delay * 5); - schedule_delayed_work(&intel_dp->pps.panel_vdd_work, delay); + queue_delayed_work(i915->unordered_wq, + &intel_dp->pps.panel_vdd_work, delay); } /* diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index ea0389c5f656..d58ed9b62e67 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -341,7 +341,7 @@ void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir) */ intel_de_rmw(dev_priv, imr_reg, 0, psr_irq_psr_error_bit_get(intel_dp)); - schedule_work(&intel_dp->psr.work); + queue_work(dev_priv->unordered_wq, &intel_dp->psr.work); } } @@ -2440,6 +2440,8 @@ static void tgl_dc3co_flush_locked(struct intel_dp *intel_dp, unsigned int frontbuffer_bits, enum fb_op_origin origin) { + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + if (!intel_dp->psr.dc3co_exitline || !intel_dp->psr.psr2_enabled || !intel_dp->psr.active) return; @@ -2453,7 +2455,7 @@ tgl_dc3co_flush_locked(struct intel_dp *intel_dp, unsigned int frontbuffer_bits, return; tgl_psr2_enable_dc3co(intel_dp); - mod_delayed_work(system_wq, &intel_dp->psr.dc3co_work, + mod_delayed_work(i915->unordered_wq, &intel_dp->psr.dc3co_work, intel_dp->psr.dc3co_exit_delay); } @@ -2493,7 +2495,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) psr_force_hw_tracking_exit(intel_dp); if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits) - schedule_work(&intel_dp->psr.work); + queue_work(dev_priv->unordered_wq, &intel_dp->psr.work); } } diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 750326434677..2ebd937f3b4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine) static void execlists_capture(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; struct execlists_capture *cap; if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine) goto err_rq; INIT_WORK(&cap->work, execlists_capture_work); - schedule_work(&cap->work); + queue_work(i915->unordered_wq, &cap->work); return; err_rq: @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref) * lock, we can delegate the free of the engine to an RCU worker. */ INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); - queue_rcu_work(system_wq, &ve->rcu); + queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu); } static void virtual_engine_initial_hint(struct virtual_engine *ve) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index cadfd85785b1..86b5a9ba323d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk) { struct intel_gt_buffer_pool *pool = container_of(wrk, typeof(*pool), work.work); + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); if (pool_free_older_than(pool, HZ)) - schedule_delayed_work(&pool->work, - round_jiffies_up_relative(HZ)); + queue_delayed_work(gt->i915->unordered_wq, &pool->work, + round_jiffies_up_relative(HZ)); } static void pool_retire(struct i915_active *ref) @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref) struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); struct intel_gt_buffer_pool *pool = node->pool; + struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool); struct list_head *list = bucket_for_size(pool, node->obj->base.size); unsigned long flags; @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref) WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */ spin_unlock_irqrestore(&pool->lock, flags); - schedule_delayed_work(&pool->work, - round_jiffies_up_relative(HZ)); + queue_delayed_work(gt->i915->unordered_wq, &pool->work, + round_jiffies_up_relative(HZ)); } void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 8f888d36f16d..62fd00c9e519 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT) gt->i915->l3_parity.which_slice |= 1 << 0; - schedule_work(>->i915->l3_parity.error_work); + queue_work(gt->i915->unordered_wq, >->i915->l3_parity.error_work); } void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 1dfd01668c79..d1a382dfaa1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, GEM_BUG_ON(intel_engine_is_virtual(engine)); if (add_retire(engine, tl)) - schedule_work(&engine->retire_work); + queue_work(engine->i915->unordered_wq, &engine->retire_work); } void intel_engine_init_retire(struct intel_engine_cs *engine) @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work) struct intel_gt *gt = container_of(work, typeof(*gt), requests.retire_work.work); - schedule_delayed_work(>->requests.retire_work, - round_jiffies_up_relative(HZ)); + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, + round_jiffies_up_relative(HZ)); intel_gt_retire_requests(gt); } @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt) void intel_gt_unpark_requests(struct intel_gt *gt) { - schedule_delayed_work(>->requests.retire_work, - round_jiffies_up_relative(HZ)); + queue_delayed_work(gt->i915->unordered_wq, >->requests.retire_work, + round_jiffies_up_relative(HZ)); } void intel_gt_fini_requests(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 195ff72d7a14..e2152f75ba2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w, w->name = name; INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); - schedule_delayed_work(&w->work, timeout); + queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout); } void __intel_fini_wedge(struct intel_wedge_me *w) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e68a99205599..e92e626d4994 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void rps_timer(struct timer_list *t) { struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_gt *gt = rps_to_gt(rps); struct intel_engine_cs *engine; ktime_t dt, last, timestamp; enum intel_engine_id id; s64 max_busy[3] = {}; timestamp = 0; - for_each_engine(engine, rps_to_gt(rps), id) { + for_each_engine(engine, gt, id) { s64 busy; int i; @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t) busy += div_u64(max_busy[i], 1 << i); } - GT_TRACE(rps_to_gt(rps), + GT_TRACE(gt, "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", busy, (int)div64_u64(100 * busy, dt), max_busy[0], max_busy[1], max_busy[2], @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t) rps->cur_freq < rps->max_freq_softlimit) { rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; rps->pm_interval = 1; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); } else if (100 * busy < rps->power.down_threshold * dt && rps->cur_freq > rps->min_freq_softlimit) { rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; rps->pm_interval = 1; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); } else { rps->last_adj = 0; } @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val) } mutex_unlock(&rps->lock); if (boost) - schedule_work(&rps->work); + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); return 0; } @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq) if (!atomic_fetch_inc(&slpc->num_waiters)) { GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); - schedule_work(&slpc->boost_work); + queue_work(rps_to_gt(rps)->i915->unordered_wq, + &slpc->boost_work); } return; @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq) rq->fence.context, rq->fence.seqno); if (READ_ONCE(rps->cur_freq) < rps->boost_freq) - schedule_work(&rps->work); + queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work); WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ } @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); } void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; - schedule_work(&rps->work); + queue_work(gt->i915->unordered_wq, &rps->work); spin_unlock(gt->irq_lock); } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 542ce6d2de19..78cdfc6f315f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt) /* Boost gpufreq to max [waitboost] and keep it fixed */ atomic_inc(>->rps.num_waiters); - schedule_work(>->rps.work); + queue_work(gt->i915->unordered_wq, >->rps.work); flush_work(>->rps.work); } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 522733a89946..88808aa85b26 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) if (dev_priv->display.hotplug.dp_wq == NULL) goto out_free_wq; + /* + * The unordered i915 workqueue should be used for all work + * scheduling that do not require running in order. + */ + dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0); + if (dev_priv->unordered_wq == NULL) + goto out_free_dp_wq; + return 0; +out_free_dp_wq: + destroy_workqueue(dev_priv->unordered_wq); out_free_wq: destroy_workqueue(dev_priv->wq); out_err: @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { + destroy_workqueue(dev_priv->unordered_wq); destroy_workqueue(dev_priv->display.hotplug.dp_wq); destroy_workqueue(dev_priv->wq); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 14c5338c96a6..8f2665e8afb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -259,6 +259,14 @@ struct drm_i915_private { */ struct workqueue_struct *wq; + /** + * unordered_wq - internal workqueue for unordered work + * + * This workqueue should be used for all unordered work + * scheduling within i915. + */ + struct workqueue_struct *unordered_wq; + /* pm private clock gating functions */ const struct drm_i915_clock_gating_funcs *clock_gating_funcs; @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \ GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) - #endif diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 630a732aaecc..894068bb37b6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) if (!i915_request_completed(rq)) { if (llist_add(&rq->watchdog.link, >->watchdog.list)) - schedule_work(>->watchdog.work); + queue_work(gt->i915->unordered_wq, >->watchdog.work); } else { i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 40aafe676017..497ea21a347e 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) /* Assume we are not in process context and so cannot sleep. */ if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { - mod_delayed_work(system_wq, &wf->work, + mod_delayed_work(wf->i915->wq, &wf->work, FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags)); return; }
In order to avoid flush_scheduled_work() usage, add a dedicated workqueue in the drm_i915_private structure. In this way, we don't need to use the system queue anymore. This change is mostly mechanical and based on Tetsuo's original patch[1]. Link: https://patchwork.freedesktop.org/series/114608/ [1] Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Jani Nikula <jani.nikula@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Luca Coelho <luciano.coelho@intel.com> --- drivers/gpu/drm/i915/display/intel_display.c | 5 ++-- .../drm/i915/display/intel_display_driver.c | 2 +- drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- .../drm/i915/display/intel_dp_link_training.c | 3 ++- drivers/gpu/drm/i915/display/intel_drrs.c | 4 +++- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++- drivers/gpu/drm/i915/display/intel_hdcp.c | 23 +++++++++++-------- drivers/gpu/drm/i915/display/intel_hotplug.c | 18 ++++++++++----- drivers/gpu/drm/i915/display/intel_opregion.c | 3 ++- drivers/gpu/drm/i915/display/intel_pps.c | 4 +++- drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++--- .../drm/i915/gt/intel_execlists_submission.c | 5 ++-- .../gpu/drm/i915/gt/intel_gt_buffer_pool.c | 10 ++++---- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 10 ++++---- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 20 ++++++++-------- drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- drivers/gpu/drm/i915/i915_driver.c | 11 +++++++++ drivers/gpu/drm/i915/i915_drv.h | 9 +++++++- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/intel_wakeref.c | 2 +- 24 files changed, 99 insertions(+), 55 deletions(-)