diff mbox series

[v2,2/3] drm/i915: add a dedicated workqueue inside drm_i915_private

Message ID 20230524090521.596399-3-luciano.coelho@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: implement internal workqueues | expand

Commit Message

Luca Coelho May 24, 2023, 9:05 a.m. UTC
In order to avoid flush_scheduled_work() usage, add a dedicated
workqueue in the drm_i915_private structure.  In this way, we don't
need to use the system queue anymore.

This change is mostly mechanical and based on Tetsuo's original
patch[1].

Link: https://patchwork.freedesktop.org/series/114608/ [1]
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  5 ++--
 .../drm/i915/display/intel_display_driver.c   |  2 +-
 drivers/gpu/drm/i915/display/intel_dmc.c      |  2 +-
 drivers/gpu/drm/i915/display/intel_dp.c       |  2 +-
 .../drm/i915/display/intel_dp_link_training.c |  3 ++-
 drivers/gpu/drm/i915/display/intel_drrs.c     |  4 +++-
 drivers/gpu/drm/i915/display/intel_fbc.c      |  2 +-
 drivers/gpu/drm/i915/display/intel_fbdev.c    |  3 ++-
 drivers/gpu/drm/i915/display/intel_hdcp.c     | 23 +++++++++++--------
 drivers/gpu/drm/i915/display/intel_hotplug.c  | 18 ++++++++++-----
 drivers/gpu/drm/i915/display/intel_opregion.c |  3 ++-
 drivers/gpu/drm/i915/display/intel_pps.c      |  4 +++-
 drivers/gpu/drm/i915/display/intel_psr.c      |  8 ++++---
 .../drm/i915/gt/intel_execlists_submission.c  |  5 ++--
 .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 10 ++++----
 drivers/gpu/drm/i915/gt/intel_gt_irq.c        |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 10 ++++----
 drivers/gpu/drm/i915/gt/intel_reset.c         |  2 +-
 drivers/gpu/drm/i915/gt/intel_rps.c           | 20 ++++++++--------
 drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  2 +-
 drivers/gpu/drm/i915/i915_driver.c            | 11 +++++++++
 drivers/gpu/drm/i915/i915_drv.h               |  9 +++++++-
 drivers/gpu/drm/i915/i915_request.c           |  2 +-
 drivers/gpu/drm/i915/intel_wakeref.c          |  2 +-
 24 files changed, 99 insertions(+), 55 deletions(-)

Comments

Tvrtko Ursulin May 24, 2023, 11 a.m. UTC | #1
On 24/05/2023 10:05, Luca Coelho wrote:
> In order to avoid flush_scheduled_work() usage, add a dedicated
> workqueue in the drm_i915_private structure.  In this way, we don't
> need to use the system queue anymore.
> 
> This change is mostly mechanical and based on Tetsuo's original
> patch[1].
> 
> Link: https://patchwork.freedesktop.org/series/114608/ [1]
> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_display.c  |  5 ++--
>   .../drm/i915/display/intel_display_driver.c   |  2 +-
>   drivers/gpu/drm/i915/display/intel_dmc.c      |  2 +-
>   drivers/gpu/drm/i915/display/intel_dp.c       |  2 +-
>   .../drm/i915/display/intel_dp_link_training.c |  3 ++-
>   drivers/gpu/drm/i915/display/intel_drrs.c     |  4 +++-
>   drivers/gpu/drm/i915/display/intel_fbc.c      |  2 +-
>   drivers/gpu/drm/i915/display/intel_fbdev.c    |  3 ++-
>   drivers/gpu/drm/i915/display/intel_hdcp.c     | 23 +++++++++++--------
>   drivers/gpu/drm/i915/display/intel_hotplug.c  | 18 ++++++++++-----
>   drivers/gpu/drm/i915/display/intel_opregion.c |  3 ++-
>   drivers/gpu/drm/i915/display/intel_pps.c      |  4 +++-
>   drivers/gpu/drm/i915/display/intel_psr.c      |  8 ++++---
>   .../drm/i915/gt/intel_execlists_submission.c  |  5 ++--
>   .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 10 ++++----
>   drivers/gpu/drm/i915/gt/intel_gt_irq.c        |  2 +-
>   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 10 ++++----
>   drivers/gpu/drm/i915/gt/intel_reset.c         |  2 +-
>   drivers/gpu/drm/i915/gt/intel_rps.c           | 20 ++++++++--------
>   drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  2 +-
>   drivers/gpu/drm/i915/i915_driver.c            | 11 +++++++++
>   drivers/gpu/drm/i915/i915_drv.h               |  9 +++++++-
>   drivers/gpu/drm/i915/i915_request.c           |  2 +-
>   drivers/gpu/drm/i915/intel_wakeref.c          |  2 +-
>   24 files changed, 99 insertions(+), 55 deletions(-)

I'll take a look at the gt/ parts only since display experts need to 
okay the point Daniel raise anyway.

8<
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 750326434677..2ebd937f3b4c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine)
>   
>   static void execlists_capture(struct intel_engine_cs *engine)
>   {
> +	struct drm_i915_private *i915 = engine->i915;
>   	struct execlists_capture *cap;
>   
>   	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
> @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
>   		goto err_rq;
>   
>   	INIT_WORK(&cap->work, execlists_capture_work);
> -	schedule_work(&cap->work);
> +	queue_work(i915->unordered_wq, &cap->work);
>   	return;
>   
>   err_rq:
> @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref)
>   	 * lock, we can delegate the free of the engine to an RCU worker.
>   	 */
>   	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
> -	queue_rcu_work(system_wq, &ve->rcu);
> +	queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu);
>   }
>   
>   static void virtual_engine_initial_hint(struct virtual_engine *ve)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> index cadfd85785b1..86b5a9ba323d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk)
>   {
>   	struct intel_gt_buffer_pool *pool =
>   		container_of(wrk, typeof(*pool), work.work);
> +	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);

Okay. Or alternatively, pool = &gt->buffer_pool, might read simpler...

>   
>   	if (pool_free_older_than(pool, HZ))
> -		schedule_delayed_work(&pool->work,
> -				      round_jiffies_up_relative(HZ));
> +		queue_delayed_work(gt->i915->unordered_wq, &pool->work,
> +				   round_jiffies_up_relative(HZ));
>   }
>   
>   static void pool_retire(struct i915_active *ref)
> @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref)
>   	struct intel_gt_buffer_pool_node *node =
>   		container_of(ref, typeof(*node), active);
>   	struct intel_gt_buffer_pool *pool = node->pool;
> +	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);

... although I am beginning to wonder if intel_gt_buffer_pool shouldn't 
just gain a gt backpointer? That would decouple things more instead of 
tying the implementation with intel_gt implicitly. Not a strong 
direction though.

>   	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
>   	unsigned long flags;
>   
> @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref)
>   	WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
>   	spin_unlock_irqrestore(&pool->lock, flags);
>   
> -	schedule_delayed_work(&pool->work,
> -			      round_jiffies_up_relative(HZ));
> +	queue_delayed_work(gt->i915->unordered_wq, &pool->work,
> +			   round_jiffies_up_relative(HZ));
>   }
>   
>   void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index 8f888d36f16d..62fd00c9e519 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
>   	if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
>   		gt->i915->l3_parity.which_slice |= 1 << 0;
>   
> -	schedule_work(&gt->i915->l3_parity.error_work);
> +	queue_work(gt->i915->unordered_wq, &gt->i915->l3_parity.error_work);
>   }
>   
>   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> index 1dfd01668c79..d1a382dfaa1d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine,
>   	GEM_BUG_ON(intel_engine_is_virtual(engine));
>   
>   	if (add_retire(engine, tl))
> -		schedule_work(&engine->retire_work);
> +		queue_work(engine->i915->unordered_wq, &engine->retire_work);
>   }
>   
>   void intel_engine_init_retire(struct intel_engine_cs *engine)
> @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work)
>   	struct intel_gt *gt =
>   		container_of(work, typeof(*gt), requests.retire_work.work);
>   
> -	schedule_delayed_work(&gt->requests.retire_work,
> -			      round_jiffies_up_relative(HZ));
> +	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
> +			   round_jiffies_up_relative(HZ));
>   	intel_gt_retire_requests(gt);
>   }
>   
> @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt)
>   
>   void intel_gt_unpark_requests(struct intel_gt *gt)
>   {
> -	schedule_delayed_work(&gt->requests.retire_work,
> -			      round_jiffies_up_relative(HZ));
> +	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
> +			   round_jiffies_up_relative(HZ));
>   }
>   
>   void intel_gt_fini_requests(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 195ff72d7a14..e2152f75ba2e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w,
>   	w->name = name;
>   
>   	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
> -	schedule_delayed_work(&w->work, timeout);
> +	queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout);
>   }
>   
>   void __intel_fini_wedge(struct intel_wedge_me *w)
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> index e68a99205599..e92e626d4994 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
>   static void rps_timer(struct timer_list *t)
>   {
>   	struct intel_rps *rps = from_timer(rps, t, timer);
> +	struct intel_gt *gt = rps_to_gt(rps);
>   	struct intel_engine_cs *engine;
>   	ktime_t dt, last, timestamp;
>   	enum intel_engine_id id;
>   	s64 max_busy[3] = {};
>   
>   	timestamp = 0;
> -	for_each_engine(engine, rps_to_gt(rps), id) {
> +	for_each_engine(engine, gt, id) {
>   		s64 busy;
>   		int i;
>   
> @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t)
>   
>   			busy += div_u64(max_busy[i], 1 << i);
>   		}
> -		GT_TRACE(rps_to_gt(rps),
> +		GT_TRACE(gt,
>   			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
>   			 busy, (int)div64_u64(100 * busy, dt),
>   			 max_busy[0], max_busy[1], max_busy[2],
> @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t)
>   		    rps->cur_freq < rps->max_freq_softlimit) {
>   			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
>   			rps->pm_interval = 1;
> -			schedule_work(&rps->work);
> +			queue_work(gt->i915->unordered_wq, &rps->work);
>   		} else if (100 * busy < rps->power.down_threshold * dt &&
>   			   rps->cur_freq > rps->min_freq_softlimit) {
>   			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
>   			rps->pm_interval = 1;
> -			schedule_work(&rps->work);
> +			queue_work(gt->i915->unordered_wq, &rps->work);
>   		} else {
>   			rps->last_adj = 0;
>   		}
> @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
>   	}
>   	mutex_unlock(&rps->lock);
>   	if (boost)
> -		schedule_work(&rps->work);
> +		queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
>   
>   	return 0;
>   }
> @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq)
>   			if (!atomic_fetch_inc(&slpc->num_waiters)) {
>   				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
>   					 rq->fence.context, rq->fence.seqno);
> -				schedule_work(&slpc->boost_work);
> +				queue_work(rps_to_gt(rps)->i915->unordered_wq,
> +					   &slpc->boost_work);
>   			}
>   
>   			return;
> @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq)
>   			 rq->fence.context, rq->fence.seqno);
>   
>   		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
> -			schedule_work(&rps->work);
> +			queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
>   
>   		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
>   	}
> @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>   	gen6_gt_pm_mask_irq(gt, events);
>   
>   	rps->pm_iir |= events;
> -	schedule_work(&rps->work);
> +	queue_work(gt->i915->unordered_wq, &rps->work);
>   }
>   
>   void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
> @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>   		gen6_gt_pm_mask_irq(gt, events);
>   		rps->pm_iir |= events;
>   
> -		schedule_work(&rps->work);
> +		queue_work(gt->i915->unordered_wq, &rps->work);
>   		spin_unlock(gt->irq_lock);
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> index 542ce6d2de19..78cdfc6f315f 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt)
>   
>   	/* Boost gpufreq to max [waitboost] and keep it fixed */
>   	atomic_inc(&gt->rps.num_waiters);
> -	schedule_work(&gt->rps.work);
> +	queue_work(gt->i915->unordered_wq, &gt->rps.work);
>   	flush_work(&gt->rps.work);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 522733a89946..88808aa85b26 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
>   	if (dev_priv->display.hotplug.dp_wq == NULL)
>   		goto out_free_wq;
>   
> +	/*
> +	 * The unordered i915 workqueue should be used for all work
> +	 * scheduling that do not require running in order.
> +	 */

Ha-ha. Nice cop out. ;) But okay, now that we have two we don't know 
when to use each that well so not fair on you to figure it out.

> +	dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0);
> +	if (dev_priv->unordered_wq == NULL)
> +		goto out_free_dp_wq;
> +
>   	return 0;
>   
> +out_free_dp_wq:
> +	destroy_workqueue(dev_priv->unordered_wq);

Wrong wq.

>   out_free_wq:
>   	destroy_workqueue(dev_priv->wq);
>   out_err:
> @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
>   
>   static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
>   {
> +	destroy_workqueue(dev_priv->unordered_wq);
>   	destroy_workqueue(dev_priv->display.hotplug.dp_wq);
>   	destroy_workqueue(dev_priv->wq);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 14c5338c96a6..8f2665e8afb5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -259,6 +259,14 @@ struct drm_i915_private {
>   	 */
>   	struct workqueue_struct *wq;
>   
> +	/**
> +	 * unordered_wq - internal workqueue for unordered work
> +	 *
> +	 * This workqueue should be used for all unordered work
> +	 * scheduling within i915.

Proably add something like ", which used to be scheduled on the 
system_wq before moving to a driver instance due deprecation of
flush_scheduled_work()."

That way we leave some note to the reader.

> +	 */
> +	struct workqueue_struct *unordered_wq;
> +
>   	/* pm private clock gating functions */
>   	const struct drm_i915_clock_gating_funcs *clock_gating_funcs;
>   
> @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>   
>   #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
>   				       GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> -

Tidy if you can please.

>   #endif
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 630a732aaecc..894068bb37b6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
>   
>   	if (!i915_request_completed(rq)) {
>   		if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
> -			schedule_work(&gt->watchdog.work);
> +			queue_work(gt->i915->unordered_wq, &gt->watchdog.work);
>   	} else {
>   		i915_request_put(rq);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> index 40aafe676017..497ea21a347e 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.c
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
>   
>   	/* Assume we are not in process context and so cannot sleep. */
>   	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
> -		mod_delayed_work(system_wq, &wf->work,
> +		mod_delayed_work(wf->i915->wq, &wf->work,
>   				 FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags));
>   		return;
>   	}

Pending the one or two details the non-display parts look good to me.

Regards,

Tvrtko
Tvrtko Ursulin May 24, 2023, 11:05 a.m. UTC | #2
On 24/05/2023 12:00, Tvrtko Ursulin wrote:
> 
> On 24/05/2023 10:05, Luca Coelho wrote:

8<

>>       if (pool_free_older_than(pool, HZ))
>> -        schedule_delayed_work(&pool->work,
>> -                      round_jiffies_up_relative(HZ));
>> +        queue_delayed_work(gt->i915->unordered_wq, &pool->work,
>> +                   round_jiffies_up_relative(HZ));
>>   }
>>   static void pool_retire(struct i915_active *ref)
>> @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref)
>>       struct intel_gt_buffer_pool_node *node =
>>           container_of(ref, typeof(*node), active);
>>       struct intel_gt_buffer_pool *pool = node->pool;
>> +    struct intel_gt *gt = container_of(pool, struct intel_gt, 
>> buffer_pool);
> 
> ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't 
> just gain a gt backpointer? That would decouple things more instead of 
> tying the implementation with intel_gt implicitly. Not a strong 
> direction though.

Never mind on this point, code already assumes this relationships for 
instance in node_create().

Regards,

Tvrtko
Luca Coelho May 24, 2023, 12:25 p.m. UTC | #3
On Wed, 2023-05-24 at 12:00 +0100, Tvrtko Ursulin wrote:
> On 24/05/2023 10:05, Luca Coelho wrote:
> > In order to avoid flush_scheduled_work() usage, add a dedicated
> > workqueue in the drm_i915_private structure.  In this way, we don't
> > need to use the system queue anymore.
> > 
> > This change is mostly mechanical and based on Tetsuo's original
> > patch[1].
> > 
> > Link: https://patchwork.freedesktop.org/series/114608/ [1]
> > Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Jani Nikula <jani.nikula@intel.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
> > ---
> >   drivers/gpu/drm/i915/display/intel_display.c  |  5 ++--
> >   .../drm/i915/display/intel_display_driver.c   |  2 +-
> >   drivers/gpu/drm/i915/display/intel_dmc.c      |  2 +-
> >   drivers/gpu/drm/i915/display/intel_dp.c       |  2 +-
> >   .../drm/i915/display/intel_dp_link_training.c |  3 ++-
> >   drivers/gpu/drm/i915/display/intel_drrs.c     |  4 +++-
> >   drivers/gpu/drm/i915/display/intel_fbc.c      |  2 +-
> >   drivers/gpu/drm/i915/display/intel_fbdev.c    |  3 ++-
> >   drivers/gpu/drm/i915/display/intel_hdcp.c     | 23 +++++++++++--------
> >   drivers/gpu/drm/i915/display/intel_hotplug.c  | 18 ++++++++++-----
> >   drivers/gpu/drm/i915/display/intel_opregion.c |  3 ++-
> >   drivers/gpu/drm/i915/display/intel_pps.c      |  4 +++-
> >   drivers/gpu/drm/i915/display/intel_psr.c      |  8 ++++---
> >   .../drm/i915/gt/intel_execlists_submission.c  |  5 ++--
> >   .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 10 ++++----
> >   drivers/gpu/drm/i915/gt/intel_gt_irq.c        |  2 +-
> >   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 10 ++++----
> >   drivers/gpu/drm/i915/gt/intel_reset.c         |  2 +-
> >   drivers/gpu/drm/i915/gt/intel_rps.c           | 20 ++++++++--------
> >   drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  2 +-
> >   drivers/gpu/drm/i915/i915_driver.c            | 11 +++++++++
> >   drivers/gpu/drm/i915/i915_drv.h               |  9 +++++++-
> >   drivers/gpu/drm/i915/i915_request.c           |  2 +-
> >   drivers/gpu/drm/i915/intel_wakeref.c          |  2 +-
> >   24 files changed, 99 insertions(+), 55 deletions(-)
> 
> I'll take a look at the gt/ parts only since display experts need to 
> okay the point Daniel raise anyway.

Thanks!



> 8<
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index 750326434677..2ebd937f3b4c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine)
> >   
> >   static void execlists_capture(struct intel_engine_cs *engine)
> >   {
> > +	struct drm_i915_private *i915 = engine->i915;
> >   	struct execlists_capture *cap;
> >   
> >   	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
> > @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
> >   		goto err_rq;
> >   
> >   	INIT_WORK(&cap->work, execlists_capture_work);
> > -	schedule_work(&cap->work);
> > +	queue_work(i915->unordered_wq, &cap->work);
> >   	return;
> >   
> >   err_rq:
> > @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref)
> >   	 * lock, we can delegate the free of the engine to an RCU worker.
> >   	 */
> >   	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
> > -	queue_rcu_work(system_wq, &ve->rcu);
> > +	queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu);
> >   }
> >   
> >   static void virtual_engine_initial_hint(struct virtual_engine *ve)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> > index cadfd85785b1..86b5a9ba323d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> > @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk)
> >   {
> >   	struct intel_gt_buffer_pool *pool =
> >   		container_of(wrk, typeof(*pool), work.work);
> > +	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
> 
> Okay. Or alternatively, pool = &gt->buffer_pool, might read simpler...

Sorry, I don't follow.  wrk is inside intel_gt_buffer_pool, so we can
derive pool from work.  And then we know that pool is inside intel_gt,
so we derive gt from that.  How would I get gt first and derive pool
from that?


> >   
> >   	if (pool_free_older_than(pool, HZ))
> > -		schedule_delayed_work(&pool->work,
> > -				      round_jiffies_up_relative(HZ));
> > +		queue_delayed_work(gt->i915->unordered_wq, &pool->work,
> > +				   round_jiffies_up_relative(HZ));
> >   }
> >   
> >   static void pool_retire(struct i915_active *ref)
> > @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref)
> >   	struct intel_gt_buffer_pool_node *node =
> >   		container_of(ref, typeof(*node), active);
> >   	struct intel_gt_buffer_pool *pool = node->pool;
> > +	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
> 
> ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't 
> just gain a gt backpointer? That would decouple things more instead of 
> tying the implementation with intel_gt implicitly. Not a strong 
> direction though.
> 
> >   	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
> >   	unsigned long flags;
> >   
> > @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref)
> >   	WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
> >   	spin_unlock_irqrestore(&pool->lock, flags);
> >   
> > -	schedule_delayed_work(&pool->work,
> > -			      round_jiffies_up_relative(HZ));
> > +	queue_delayed_work(gt->i915->unordered_wq, &pool->work,
> > +			   round_jiffies_up_relative(HZ));
> >   }
> >   
> >   void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > index 8f888d36f16d..62fd00c9e519 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> > @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
> >   	if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
> >   		gt->i915->l3_parity.which_slice |= 1 << 0;
> >   
> > -	schedule_work(&gt->i915->l3_parity.error_work);
> > +	queue_work(gt->i915->unordered_wq, &gt->i915->l3_parity.error_work);
> >   }
> >   
> >   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> > index 1dfd01668c79..d1a382dfaa1d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> > @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine,
> >   	GEM_BUG_ON(intel_engine_is_virtual(engine));
> >   
> >   	if (add_retire(engine, tl))
> > -		schedule_work(&engine->retire_work);
> > +		queue_work(engine->i915->unordered_wq, &engine->retire_work);
> >   }
> >   
> >   void intel_engine_init_retire(struct intel_engine_cs *engine)
> > @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work)
> >   	struct intel_gt *gt =
> >   		container_of(work, typeof(*gt), requests.retire_work.work);
> >   
> > -	schedule_delayed_work(&gt->requests.retire_work,
> > -			      round_jiffies_up_relative(HZ));
> > +	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
> > +			   round_jiffies_up_relative(HZ));
> >   	intel_gt_retire_requests(gt);
> >   }
> >   
> > @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt)
> >   
> >   void intel_gt_unpark_requests(struct intel_gt *gt)
> >   {
> > -	schedule_delayed_work(&gt->requests.retire_work,
> > -			      round_jiffies_up_relative(HZ));
> > +	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
> > +			   round_jiffies_up_relative(HZ));
> >   }
> >   
> >   void intel_gt_fini_requests(struct intel_gt *gt)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> > index 195ff72d7a14..e2152f75ba2e 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> > @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w,
> >   	w->name = name;
> >   
> >   	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
> > -	schedule_delayed_work(&w->work, timeout);
> > +	queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout);
> >   }
> >   
> >   void __intel_fini_wedge(struct intel_wedge_me *w)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> > index e68a99205599..e92e626d4994 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> > @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
> >   static void rps_timer(struct timer_list *t)
> >   {
> >   	struct intel_rps *rps = from_timer(rps, t, timer);
> > +	struct intel_gt *gt = rps_to_gt(rps);
> >   	struct intel_engine_cs *engine;
> >   	ktime_t dt, last, timestamp;
> >   	enum intel_engine_id id;
> >   	s64 max_busy[3] = {};
> >   
> >   	timestamp = 0;
> > -	for_each_engine(engine, rps_to_gt(rps), id) {
> > +	for_each_engine(engine, gt, id) {
> >   		s64 busy;
> >   		int i;
> >   
> > @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t)
> >   
> >   			busy += div_u64(max_busy[i], 1 << i);
> >   		}
> > -		GT_TRACE(rps_to_gt(rps),
> > +		GT_TRACE(gt,
> >   			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
> >   			 busy, (int)div64_u64(100 * busy, dt),
> >   			 max_busy[0], max_busy[1], max_busy[2],
> > @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t)
> >   		    rps->cur_freq < rps->max_freq_softlimit) {
> >   			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
> >   			rps->pm_interval = 1;
> > -			schedule_work(&rps->work);
> > +			queue_work(gt->i915->unordered_wq, &rps->work);
> >   		} else if (100 * busy < rps->power.down_threshold * dt &&
> >   			   rps->cur_freq > rps->min_freq_softlimit) {
> >   			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
> >   			rps->pm_interval = 1;
> > -			schedule_work(&rps->work);
> > +			queue_work(gt->i915->unordered_wq, &rps->work);
> >   		} else {
> >   			rps->last_adj = 0;
> >   		}
> > @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
> >   	}
> >   	mutex_unlock(&rps->lock);
> >   	if (boost)
> > -		schedule_work(&rps->work);
> > +		queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
> >   
> >   	return 0;
> >   }
> > @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq)
> >   			if (!atomic_fetch_inc(&slpc->num_waiters)) {
> >   				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
> >   					 rq->fence.context, rq->fence.seqno);
> > -				schedule_work(&slpc->boost_work);
> > +				queue_work(rps_to_gt(rps)->i915->unordered_wq,
> > +					   &slpc->boost_work);
> >   			}
> >   
> >   			return;
> > @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq)
> >   			 rq->fence.context, rq->fence.seqno);
> >   
> >   		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
> > -			schedule_work(&rps->work);
> > +			queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
> >   
> >   		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
> >   	}
> > @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
> >   	gen6_gt_pm_mask_irq(gt, events);
> >   
> >   	rps->pm_iir |= events;
> > -	schedule_work(&rps->work);
> > +	queue_work(gt->i915->unordered_wq, &rps->work);
> >   }
> >   
> >   void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
> > @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
> >   		gen6_gt_pm_mask_irq(gt, events);
> >   		rps->pm_iir |= events;
> >   
> > -		schedule_work(&rps->work);
> > +		queue_work(gt->i915->unordered_wq, &rps->work);
> >   		spin_unlock(gt->irq_lock);
> >   	}
> >   
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> > index 542ce6d2de19..78cdfc6f315f 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> > @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt)
> >   
> >   	/* Boost gpufreq to max [waitboost] and keep it fixed */
> >   	atomic_inc(&gt->rps.num_waiters);
> > -	schedule_work(&gt->rps.work);
> > +	queue_work(gt->i915->unordered_wq, &gt->rps.work);
> >   	flush_work(&gt->rps.work);
> >   }
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> > index 522733a89946..88808aa85b26 100644
> > --- a/drivers/gpu/drm/i915/i915_driver.c
> > +++ b/drivers/gpu/drm/i915/i915_driver.c
> > @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
> >   	if (dev_priv->display.hotplug.dp_wq == NULL)
> >   		goto out_free_wq;
> >   
> > +	/*
> > +	 * The unordered i915 workqueue should be used for all work
> > +	 * scheduling that do not require running in order.
> > +	 */
> 
> Ha-ha. Nice cop out. ;) But okay, now that we have two we don't know 
> when to use each that well so not fair on you to figure it out.

:D

To be honest, I looked at all the existing users and described pretty
much what is happening.  As far as I could tell, someone started using
the system queue a long time ago and, in lack of better alternatives,
everyone else who needed a work followed that.  So the new queue is
indeed a catch all for non-ordered queues at the moment (in pretty much
the same way as the system queue was before)...

Additionally, I don't know all the current users well, since they are
everywhere, and I don't think there has been a specific rule when to
use the system queue before.

I'm totally open for better descriptions. :)


> > +	dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0);
> > +	if (dev_priv->unordered_wq == NULL)
> > +		goto out_free_dp_wq;
> > +
> >   	return 0;
> >   
> > +out_free_dp_wq:
> > +	destroy_workqueue(dev_priv->unordered_wq);
> 
> Wrong wq.

Ouch.  Good catch.


> >   out_free_wq:
> >   	destroy_workqueue(dev_priv->wq);
> >   out_err:
> > @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
> >   
> >   static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
> >   {
> > +	destroy_workqueue(dev_priv->unordered_wq);
> >   	destroy_workqueue(dev_priv->display.hotplug.dp_wq);
> >   	destroy_workqueue(dev_priv->wq);
> >   }
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 14c5338c96a6..8f2665e8afb5 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -259,6 +259,14 @@ struct drm_i915_private {
> >   	 */
> >   	struct workqueue_struct *wq;
> >   
> > +	/**
> > +	 * unordered_wq - internal workqueue for unordered work
> > +	 *
> > +	 * This workqueue should be used for all unordered work
> > +	 * scheduling within i915.
> 
> Proably add something like ", which used to be scheduled on the 
> system_wq before moving to a driver instance due deprecation of
> flush_scheduled_work()."

I did start writing something like that, but then I thought that, in
the code, writing about the history of the implementation doesn't add
much.  We have the git log for that.  But, obviously, I can add that if
you want.


> That way we leave some note to the reader.
> 
> > +	 */
> > +	struct workqueue_struct *unordered_wq;
> > +
> >   	/* pm private clock gating functions */
> >   	const struct drm_i915_clock_gating_funcs *clock_gating_funcs;
> >   
> > @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
> >   
> >   #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
> >   				       GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> > -
> 
> Tidy if you can please.

Oops!


> >   #endif
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 630a732aaecc..894068bb37b6 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
> >   
> >   	if (!i915_request_completed(rq)) {
> >   		if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
> > -			schedule_work(&gt->watchdog.work);
> > +			queue_work(gt->i915->unordered_wq, &gt->watchdog.work);
> >   	} else {
> >   		i915_request_put(rq);
> >   	}
> > diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> > index 40aafe676017..497ea21a347e 100644
> > --- a/drivers/gpu/drm/i915/intel_wakeref.c
> > +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> > @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
> >   
> >   	/* Assume we are not in process context and so cannot sleep. */
> >   	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
> > -		mod_delayed_work(system_wq, &wf->work,
> > +		mod_delayed_work(wf->i915->wq, &wf->work,
> >   				 FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags));
> >   		return;
> >   	}
> 
> Pending the one or two details the non-display parts look good to me.

Great, thanks! I'll send a new version addressing your comments and
wait for display parts review.

--
Cheers,
Luca.
Jani Nikula May 26, 2023, 11:30 a.m. UTC | #4
On Wed, 24 May 2023, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote:
> On 24/05/2023 10:05, Luca Coelho wrote:
>> In order to avoid flush_scheduled_work() usage, add a dedicated
>> workqueue in the drm_i915_private structure.  In this way, we don't
>> need to use the system queue anymore.
>> 
>> This change is mostly mechanical and based on Tetsuo's original
>> patch[1].
>> 
>> Link: https://patchwork.freedesktop.org/series/114608/ [1]
>> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Jani Nikula <jani.nikula@intel.com>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
>> ---
>>   drivers/gpu/drm/i915/display/intel_display.c  |  5 ++--
>>   .../drm/i915/display/intel_display_driver.c   |  2 +-
>>   drivers/gpu/drm/i915/display/intel_dmc.c      |  2 +-
>>   drivers/gpu/drm/i915/display/intel_dp.c       |  2 +-
>>   .../drm/i915/display/intel_dp_link_training.c |  3 ++-
>>   drivers/gpu/drm/i915/display/intel_drrs.c     |  4 +++-
>>   drivers/gpu/drm/i915/display/intel_fbc.c      |  2 +-
>>   drivers/gpu/drm/i915/display/intel_fbdev.c    |  3 ++-
>>   drivers/gpu/drm/i915/display/intel_hdcp.c     | 23 +++++++++++--------
>>   drivers/gpu/drm/i915/display/intel_hotplug.c  | 18 ++++++++++-----
>>   drivers/gpu/drm/i915/display/intel_opregion.c |  3 ++-
>>   drivers/gpu/drm/i915/display/intel_pps.c      |  4 +++-
>>   drivers/gpu/drm/i915/display/intel_psr.c      |  8 ++++---
>>   .../drm/i915/gt/intel_execlists_submission.c  |  5 ++--
>>   .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 10 ++++----
>>   drivers/gpu/drm/i915/gt/intel_gt_irq.c        |  2 +-
>>   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 10 ++++----
>>   drivers/gpu/drm/i915/gt/intel_reset.c         |  2 +-
>>   drivers/gpu/drm/i915/gt/intel_rps.c           | 20 ++++++++--------
>>   drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  2 +-
>>   drivers/gpu/drm/i915/i915_driver.c            | 11 +++++++++
>>   drivers/gpu/drm/i915/i915_drv.h               |  9 +++++++-
>>   drivers/gpu/drm/i915/i915_request.c           |  2 +-
>>   drivers/gpu/drm/i915/intel_wakeref.c          |  2 +-
>>   24 files changed, 99 insertions(+), 55 deletions(-)
>
> I'll take a look at the gt/ parts only since display experts need to 
> okay the point Daniel raise anyway.

There's a bunch of lockdep failures in BAT. Are they possible related to
switching to unordered wq?

BR,
Jani


>
> 8<
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index 750326434677..2ebd937f3b4c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -2327,6 +2327,7 @@ static u32 active_ccid(struct intel_engine_cs *engine)
>>   
>>   static void execlists_capture(struct intel_engine_cs *engine)
>>   {
>> +	struct drm_i915_private *i915 = engine->i915;
>>   	struct execlists_capture *cap;
>>   
>>   	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
>> @@ -2375,7 +2376,7 @@ static void execlists_capture(struct intel_engine_cs *engine)
>>   		goto err_rq;
>>   
>>   	INIT_WORK(&cap->work, execlists_capture_work);
>> -	schedule_work(&cap->work);
>> +	queue_work(i915->unordered_wq, &cap->work);
>>   	return;
>>   
>>   err_rq:
>> @@ -3680,7 +3681,7 @@ static void virtual_context_destroy(struct kref *kref)
>>   	 * lock, we can delegate the free of the engine to an RCU worker.
>>   	 */
>>   	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
>> -	queue_rcu_work(system_wq, &ve->rcu);
>> +	queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu);
>>   }
>>   
>>   static void virtual_engine_initial_hint(struct virtual_engine *ve)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
>> index cadfd85785b1..86b5a9ba323d 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
>> @@ -88,10 +88,11 @@ static void pool_free_work(struct work_struct *wrk)
>>   {
>>   	struct intel_gt_buffer_pool *pool =
>>   		container_of(wrk, typeof(*pool), work.work);
>> +	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
>
> Okay. Or alternatively, pool = &gt->buffer_pool, might read simpler...
>
>>   
>>   	if (pool_free_older_than(pool, HZ))
>> -		schedule_delayed_work(&pool->work,
>> -				      round_jiffies_up_relative(HZ));
>> +		queue_delayed_work(gt->i915->unordered_wq, &pool->work,
>> +				   round_jiffies_up_relative(HZ));
>>   }
>>   
>>   static void pool_retire(struct i915_active *ref)
>> @@ -99,6 +100,7 @@ static void pool_retire(struct i915_active *ref)
>>   	struct intel_gt_buffer_pool_node *node =
>>   		container_of(ref, typeof(*node), active);
>>   	struct intel_gt_buffer_pool *pool = node->pool;
>> +	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
>
> ... although I am beginning to wonder if intel_gt_buffer_pool shouldn't 
> just gain a gt backpointer? That would decouple things more instead of 
> tying the implementation with intel_gt implicitly. Not a strong 
> direction though.
>
>>   	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
>>   	unsigned long flags;
>>   
>> @@ -116,8 +118,8 @@ static void pool_retire(struct i915_active *ref)
>>   	WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
>>   	spin_unlock_irqrestore(&pool->lock, flags);
>>   
>> -	schedule_delayed_work(&pool->work,
>> -			      round_jiffies_up_relative(HZ));
>> +	queue_delayed_work(gt->i915->unordered_wq, &pool->work,
>> +			   round_jiffies_up_relative(HZ));
>>   }
>>   
>>   void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
>> index 8f888d36f16d..62fd00c9e519 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
>> @@ -376,7 +376,7 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
>>   	if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
>>   		gt->i915->l3_parity.which_slice |= 1 << 0;
>>   
>> -	schedule_work(&gt->i915->l3_parity.error_work);
>> +	queue_work(gt->i915->unordered_wq, &gt->i915->l3_parity.error_work);
>>   }
>>   
>>   void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
>> index 1dfd01668c79..d1a382dfaa1d 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
>> @@ -116,7 +116,7 @@ void intel_engine_add_retire(struct intel_engine_cs *engine,
>>   	GEM_BUG_ON(intel_engine_is_virtual(engine));
>>   
>>   	if (add_retire(engine, tl))
>> -		schedule_work(&engine->retire_work);
>> +		queue_work(engine->i915->unordered_wq, &engine->retire_work);
>>   }
>>   
>>   void intel_engine_init_retire(struct intel_engine_cs *engine)
>> @@ -207,8 +207,8 @@ static void retire_work_handler(struct work_struct *work)
>>   	struct intel_gt *gt =
>>   		container_of(work, typeof(*gt), requests.retire_work.work);
>>   
>> -	schedule_delayed_work(&gt->requests.retire_work,
>> -			      round_jiffies_up_relative(HZ));
>> +	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
>> +			   round_jiffies_up_relative(HZ));
>>   	intel_gt_retire_requests(gt);
>>   }
>>   
>> @@ -224,8 +224,8 @@ void intel_gt_park_requests(struct intel_gt *gt)
>>   
>>   void intel_gt_unpark_requests(struct intel_gt *gt)
>>   {
>> -	schedule_delayed_work(&gt->requests.retire_work,
>> -			      round_jiffies_up_relative(HZ));
>> +	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
>> +			   round_jiffies_up_relative(HZ));
>>   }
>>   
>>   void intel_gt_fini_requests(struct intel_gt *gt)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
>> index 195ff72d7a14..e2152f75ba2e 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
>> @@ -1625,7 +1625,7 @@ void __intel_init_wedge(struct intel_wedge_me *w,
>>   	w->name = name;
>>   
>>   	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
>> -	schedule_delayed_work(&w->work, timeout);
>> +	queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout);
>>   }
>>   
>>   void __intel_fini_wedge(struct intel_wedge_me *w)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
>> index e68a99205599..e92e626d4994 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
>> @@ -73,13 +73,14 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
>>   static void rps_timer(struct timer_list *t)
>>   {
>>   	struct intel_rps *rps = from_timer(rps, t, timer);
>> +	struct intel_gt *gt = rps_to_gt(rps);
>>   	struct intel_engine_cs *engine;
>>   	ktime_t dt, last, timestamp;
>>   	enum intel_engine_id id;
>>   	s64 max_busy[3] = {};
>>   
>>   	timestamp = 0;
>> -	for_each_engine(engine, rps_to_gt(rps), id) {
>> +	for_each_engine(engine, gt, id) {
>>   		s64 busy;
>>   		int i;
>>   
>> @@ -123,7 +124,7 @@ static void rps_timer(struct timer_list *t)
>>   
>>   			busy += div_u64(max_busy[i], 1 << i);
>>   		}
>> -		GT_TRACE(rps_to_gt(rps),
>> +		GT_TRACE(gt,
>>   			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
>>   			 busy, (int)div64_u64(100 * busy, dt),
>>   			 max_busy[0], max_busy[1], max_busy[2],
>> @@ -133,12 +134,12 @@ static void rps_timer(struct timer_list *t)
>>   		    rps->cur_freq < rps->max_freq_softlimit) {
>>   			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
>>   			rps->pm_interval = 1;
>> -			schedule_work(&rps->work);
>> +			queue_work(gt->i915->unordered_wq, &rps->work);
>>   		} else if (100 * busy < rps->power.down_threshold * dt &&
>>   			   rps->cur_freq > rps->min_freq_softlimit) {
>>   			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
>>   			rps->pm_interval = 1;
>> -			schedule_work(&rps->work);
>> +			queue_work(gt->i915->unordered_wq, &rps->work);
>>   		} else {
>>   			rps->last_adj = 0;
>>   		}
>> @@ -973,7 +974,7 @@ static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
>>   	}
>>   	mutex_unlock(&rps->lock);
>>   	if (boost)
>> -		schedule_work(&rps->work);
>> +		queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
>>   
>>   	return 0;
>>   }
>> @@ -1025,7 +1026,8 @@ void intel_rps_boost(struct i915_request *rq)
>>   			if (!atomic_fetch_inc(&slpc->num_waiters)) {
>>   				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
>>   					 rq->fence.context, rq->fence.seqno);
>> -				schedule_work(&slpc->boost_work);
>> +				queue_work(rps_to_gt(rps)->i915->unordered_wq,
>> +					   &slpc->boost_work);
>>   			}
>>   
>>   			return;
>> @@ -1041,7 +1043,7 @@ void intel_rps_boost(struct i915_request *rq)
>>   			 rq->fence.context, rq->fence.seqno);
>>   
>>   		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
>> -			schedule_work(&rps->work);
>> +			queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
>>   
>>   		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
>>   	}
>> @@ -1900,7 +1902,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>>   	gen6_gt_pm_mask_irq(gt, events);
>>   
>>   	rps->pm_iir |= events;
>> -	schedule_work(&rps->work);
>> +	queue_work(gt->i915->unordered_wq, &rps->work);
>>   }
>>   
>>   void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>> @@ -1917,7 +1919,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
>>   		gen6_gt_pm_mask_irq(gt, events);
>>   		rps->pm_iir |= events;
>>   
>> -		schedule_work(&rps->work);
>> +		queue_work(gt->i915->unordered_wq, &rps->work);
>>   		spin_unlock(gt->irq_lock);
>>   	}
>>   
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
>> index 542ce6d2de19..78cdfc6f315f 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
>> @@ -27,7 +27,7 @@ static void perf_begin(struct intel_gt *gt)
>>   
>>   	/* Boost gpufreq to max [waitboost] and keep it fixed */
>>   	atomic_inc(&gt->rps.num_waiters);
>> -	schedule_work(&gt->rps.work);
>> +	queue_work(gt->i915->unordered_wq, &gt->rps.work);
>>   	flush_work(&gt->rps.work);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
>> index 522733a89946..88808aa85b26 100644
>> --- a/drivers/gpu/drm/i915/i915_driver.c
>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>> @@ -132,8 +132,18 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
>>   	if (dev_priv->display.hotplug.dp_wq == NULL)
>>   		goto out_free_wq;
>>   
>> +	/*
>> +	 * The unordered i915 workqueue should be used for all work
>> +	 * scheduling that do not require running in order.
>> +	 */
>
> Ha-ha. Nice cop out. ;) But okay, now that we have two we don't know 
> when to use each that well so not fair on you to figure it out.
>
>> +	dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0);
>> +	if (dev_priv->unordered_wq == NULL)
>> +		goto out_free_dp_wq;
>> +
>>   	return 0;
>>   
>> +out_free_dp_wq:
>> +	destroy_workqueue(dev_priv->unordered_wq);
>
> Wrong wq.
>
>>   out_free_wq:
>>   	destroy_workqueue(dev_priv->wq);
>>   out_err:
>> @@ -144,6 +154,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
>>   
>>   static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
>>   {
>> +	destroy_workqueue(dev_priv->unordered_wq);
>>   	destroy_workqueue(dev_priv->display.hotplug.dp_wq);
>>   	destroy_workqueue(dev_priv->wq);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 14c5338c96a6..8f2665e8afb5 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -259,6 +259,14 @@ struct drm_i915_private {
>>   	 */
>>   	struct workqueue_struct *wq;
>>   
>> +	/**
>> +	 * unordered_wq - internal workqueue for unordered work
>> +	 *
>> +	 * This workqueue should be used for all unordered work
>> +	 * scheduling within i915.
>
> Proably add something like ", which used to be scheduled on the 
> system_wq before moving to a driver instance due deprecation of
> flush_scheduled_work()."
>
> That way we leave some note to the reader.
>
>> +	 */
>> +	struct workqueue_struct *unordered_wq;
>> +
>>   	/* pm private clock gating functions */
>>   	const struct drm_i915_clock_gating_funcs *clock_gating_funcs;
>>   
>> @@ -930,5 +938,4 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>>   
>>   #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
>>   				       GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> -
>
> Tidy if you can please.
>
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index 630a732aaecc..894068bb37b6 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -290,7 +290,7 @@ static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
>>   
>>   	if (!i915_request_completed(rq)) {
>>   		if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
>> -			schedule_work(&gt->watchdog.work);
>> +			queue_work(gt->i915->unordered_wq, &gt->watchdog.work);
>>   	} else {
>>   		i915_request_put(rq);
>>   	}
>> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
>> index 40aafe676017..497ea21a347e 100644
>> --- a/drivers/gpu/drm/i915/intel_wakeref.c
>> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
>> @@ -75,7 +75,7 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
>>   
>>   	/* Assume we are not in process context and so cannot sleep. */
>>   	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
>> -		mod_delayed_work(system_wq, &wf->work,
>> +		mod_delayed_work(wf->i915->wq, &wf->work,
>>   				 FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags));
>>   		return;
>>   	}
>
> Pending the one or two details the non-display parts look good to me.
>
> Regards,
>
> Tvrtko
Luca Coelho May 28, 2023, 2:58 p.m. UTC | #5
On Fri, 2023-05-26 at 14:30 +0300, Jani Nikula wrote:
> On Wed, 24 May 2023, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote:
> > On 24/05/2023 10:05, Luca Coelho wrote:
> > > In order to avoid flush_scheduled_work() usage, add a dedicated
> > > workqueue in the drm_i915_private structure.  In this way, we don't
> > > need to use the system queue anymore.
> > > 
> > > This change is mostly mechanical and based on Tetsuo's original
> > > patch[1].
> > > 
> > > Link: https://patchwork.freedesktop.org/series/114608/ [1]
> > > Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/display/intel_display.c  |  5 ++--
> > >   .../drm/i915/display/intel_display_driver.c   |  2 +-
> > >   drivers/gpu/drm/i915/display/intel_dmc.c      |  2 +-
> > >   drivers/gpu/drm/i915/display/intel_dp.c       |  2 +-
> > >   .../drm/i915/display/intel_dp_link_training.c |  3 ++-
> > >   drivers/gpu/drm/i915/display/intel_drrs.c     |  4 +++-
> > >   drivers/gpu/drm/i915/display/intel_fbc.c      |  2 +-
> > >   drivers/gpu/drm/i915/display/intel_fbdev.c    |  3 ++-
> > >   drivers/gpu/drm/i915/display/intel_hdcp.c     | 23 +++++++++++--------
> > >   drivers/gpu/drm/i915/display/intel_hotplug.c  | 18 ++++++++++-----
> > >   drivers/gpu/drm/i915/display/intel_opregion.c |  3 ++-
> > >   drivers/gpu/drm/i915/display/intel_pps.c      |  4 +++-
> > >   drivers/gpu/drm/i915/display/intel_psr.c      |  8 ++++---
> > >   .../drm/i915/gt/intel_execlists_submission.c  |  5 ++--
> > >   .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 10 ++++----
> > >   drivers/gpu/drm/i915/gt/intel_gt_irq.c        |  2 +-
> > >   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 10 ++++----
> > >   drivers/gpu/drm/i915/gt/intel_reset.c         |  2 +-
> > >   drivers/gpu/drm/i915/gt/intel_rps.c           | 20 ++++++++--------
> > >   drivers/gpu/drm/i915/gt/selftest_engine_cs.c  |  2 +-
> > >   drivers/gpu/drm/i915/i915_driver.c            | 11 +++++++++
> > >   drivers/gpu/drm/i915/i915_drv.h               |  9 +++++++-
> > >   drivers/gpu/drm/i915/i915_request.c           |  2 +-
> > >   drivers/gpu/drm/i915/intel_wakeref.c          |  2 +-
> > >   24 files changed, 99 insertions(+), 55 deletions(-)
> > 
> > I'll take a look at the gt/ parts only since display experts need to 
> > okay the point Daniel raise anyway.
> 
> There's a bunch of lockdep failures in BAT. Are they possible related to
> switching to unordered wq?

I have not checked those results yet, because I'll send a new version
soon anyway.  I tested this before, but with the dedicated wakeref
workqueues, and it all passed cleanly.  So it _may_ be related to that
change.  I'll check it.

--
Cheers,
Luca.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 0490c6412ab5..f52b393382df 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7155,11 +7155,12 @@  intel_atomic_commit_ready(struct i915_sw_fence *fence,
 		break;
 	case FENCE_FREE:
 		{
+			struct drm_i915_private *i915 = to_i915(state->base.dev);
 			struct intel_atomic_helper *helper =
-				&to_i915(state->base.dev)->display.atomic_helper;
+				&i915->display.atomic_helper;
 
 			if (llist_add(&state->freed, &helper->free_list))
-				schedule_work(&helper->free_work);
+				queue_work(i915->unordered_wq, &helper->free_work);
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 60ce10fc7205..a9e36ddb6c1d 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -435,7 +435,7 @@  void intel_display_driver_remove_noirq(struct drm_i915_private *i915)
 	intel_unregister_dsm_handler();
 
 	/* flush any delayed tasks or pending work */
-	flush_scheduled_work();
+	flush_workqueue(i915->unordered_wq);
 
 	intel_hdcp_component_fini(i915);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 8a88de67ff0a..5f479f3828bb 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -1057,7 +1057,7 @@  void intel_dmc_init(struct drm_i915_private *i915)
 	i915->display.dmc.dmc = dmc;
 
 	drm_dbg_kms(&i915->drm, "Loading %s\n", dmc->fw_path);
-	schedule_work(&dmc->work);
+	queue_work(i915->unordered_wq, &dmc->work);
 
 	return;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 4bec8cd7979f..1466283cd569 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5251,7 +5251,7 @@  static void intel_dp_oob_hotplug_event(struct drm_connector *connector)
 	spin_lock_irq(&i915->irq_lock);
 	i915->display.hotplug.event_bits |= BIT(encoder->hpd_pin);
 	spin_unlock_irq(&i915->irq_lock);
-	queue_delayed_work(system_wq, &i915->display.hotplug.hotplug_work, 0);
+	queue_delayed_work(i915->unordered_wq, &i915->display.hotplug.hotplug_work, 0);
 }
 
 static const struct drm_connector_funcs intel_dp_connector_funcs = {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 0952a707358c..2cccc0a970f1 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -1064,6 +1064,7 @@  static void intel_dp_schedule_fallback_link_training(struct intel_dp *intel_dp,
 						     const struct intel_crtc_state *crtc_state)
 {
 	struct intel_connector *intel_connector = intel_dp->attached_connector;
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
 	if (!intel_digital_port_connected(&dp_to_dig_port(intel_dp)->base)) {
 		lt_dbg(intel_dp, DP_PHY_DPRX, "Link Training failed on disconnected sink.\n");
@@ -1081,7 +1082,7 @@  static void intel_dp_schedule_fallback_link_training(struct intel_dp *intel_dp,
 	}
 
 	/* Schedule a Hotplug Uevent to userspace to start modeset */
-	schedule_work(&intel_connector->modeset_retry_work);
+	queue_work(i915->unordered_wq, &intel_connector->modeset_retry_work);
 }
 
 /* Perform the link training on all LTTPRs and the DPRX on a link. */
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
index 760e63cdc0c8..0d35b6be5b6a 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.c
+++ b/drivers/gpu/drm/i915/display/intel_drrs.c
@@ -111,7 +111,9 @@  static void intel_drrs_set_state(struct intel_crtc *crtc,
 
 static void intel_drrs_schedule_work(struct intel_crtc *crtc)
 {
-	mod_delayed_work(system_wq, &crtc->drrs.work, msecs_to_jiffies(1000));
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	mod_delayed_work(i915->unordered_wq, &crtc->drrs.work, msecs_to_jiffies(1000));
 }
 
 static unsigned int intel_drrs_frontbuffer_bits(const struct intel_crtc_state *crtc_state)
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 11bb8cf9c9d0..3ae284da78df 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1600,7 +1600,7 @@  static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc)
 	if (READ_ONCE(fbc->underrun_detected))
 		return;
 
-	schedule_work(&fbc->underrun_work);
+	queue_work(fbc->i915->unordered_wq, &fbc->underrun_work);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index aab1ae74a8f7..c88f13d319b9 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -689,7 +689,8 @@  void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 			/* Don't block our own workqueue as this can
 			 * be run in parallel with other i915.ko tasks.
 			 */
-			schedule_work(&dev_priv->display.fbdev.suspend_work);
+			queue_work(dev_priv->unordered_wq,
+				   &dev_priv->display.fbdev.suspend_work);
 			return;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index dd539106ee5a..aa74b7b7b266 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -983,6 +983,7 @@  static void intel_hdcp_update_value(struct intel_connector *connector,
 	struct drm_device *dev = connector->base.dev;
 	struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
 	struct intel_hdcp *hdcp = &connector->hdcp;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
 	drm_WARN_ON(connector->base.dev, !mutex_is_locked(&hdcp->mutex));
 
@@ -1001,7 +1002,7 @@  static void intel_hdcp_update_value(struct intel_connector *connector,
 	hdcp->value = value;
 	if (update_property) {
 		drm_connector_get(&connector->base);
-		schedule_work(&hdcp->prop_work);
+		queue_work(i915->unordered_wq, &hdcp->prop_work);
 	}
 }
 
@@ -2090,16 +2091,17 @@  static void intel_hdcp_check_work(struct work_struct *work)
 					       struct intel_hdcp,
 					       check_work);
 	struct intel_connector *connector = intel_hdcp_to_connector(hdcp);
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
 	if (drm_connector_is_unregistered(&connector->base))
 		return;
 
 	if (!intel_hdcp2_check_link(connector))
-		schedule_delayed_work(&hdcp->check_work,
-				      DRM_HDCP2_CHECK_PERIOD_MS);
+		queue_delayed_work(i915->unordered_wq, &hdcp->check_work,
+				   DRM_HDCP2_CHECK_PERIOD_MS);
 	else if (!intel_hdcp_check_link(connector))
-		schedule_delayed_work(&hdcp->check_work,
-				      DRM_HDCP_CHECK_PERIOD_MS);
+		queue_delayed_work(i915->unordered_wq, &hdcp->check_work,
+				   DRM_HDCP_CHECK_PERIOD_MS);
 }
 
 static int i915_hdcp_component_bind(struct device *i915_kdev,
@@ -2398,7 +2400,8 @@  int intel_hdcp_enable(struct intel_atomic_state *state,
 	}
 
 	if (!ret) {
-		schedule_delayed_work(&hdcp->check_work, check_link_interval);
+		queue_delayed_work(dev_priv->unordered_wq, &hdcp->check_work,
+				   check_link_interval);
 		intel_hdcp_update_value(connector,
 					DRM_MODE_CONTENT_PROTECTION_ENABLED,
 					true);
@@ -2447,6 +2450,7 @@  void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 				to_intel_connector(conn_state->connector);
 	struct intel_hdcp *hdcp = &connector->hdcp;
 	bool content_protection_type_changed, desired_and_not_enabled = false;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
 	if (!connector->hdcp.shim)
 		return;
@@ -2473,7 +2477,7 @@  void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 		mutex_lock(&hdcp->mutex);
 		hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED;
 		drm_connector_get(&connector->base);
-		schedule_work(&hdcp->prop_work);
+		queue_work(i915->unordered_wq, &hdcp->prop_work);
 		mutex_unlock(&hdcp->mutex);
 	}
 
@@ -2490,7 +2494,7 @@  void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 		 */
 		if (!desired_and_not_enabled && !content_protection_type_changed) {
 			drm_connector_get(&connector->base);
-			schedule_work(&hdcp->prop_work);
+			queue_work(i915->unordered_wq, &hdcp->prop_work);
 		}
 	}
 
@@ -2602,6 +2606,7 @@  void intel_hdcp_atomic_check(struct drm_connector *connector,
 void intel_hdcp_handle_cp_irq(struct intel_connector *connector)
 {
 	struct intel_hdcp *hdcp = &connector->hdcp;
+	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
 	if (!hdcp->shim)
 		return;
@@ -2609,5 +2614,5 @@  void intel_hdcp_handle_cp_irq(struct intel_connector *connector)
 	atomic_inc(&connector->hdcp.cp_irq_count);
 	wake_up_all(&connector->hdcp.cp_irq_queue);
 
-	schedule_delayed_work(&hdcp->check_work, 0);
+	queue_delayed_work(i915->unordered_wq, &hdcp->check_work, 0);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 23a5e1a875f1..1160fa20433b 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -212,7 +212,8 @@  intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
 	/* Enable polling and queue hotplug re-enabling. */
 	if (hpd_disabled) {
 		drm_kms_helper_poll_enable(&dev_priv->drm);
-		mod_delayed_work(system_wq, &dev_priv->display.hotplug.reenable_work,
+		mod_delayed_work(dev_priv->unordered_wq,
+				 &dev_priv->display.hotplug.reenable_work,
 				 msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
 	}
 }
@@ -339,7 +340,8 @@  static void i915_digport_work_func(struct work_struct *work)
 		spin_lock_irq(&dev_priv->irq_lock);
 		dev_priv->display.hotplug.event_bits |= old_bits;
 		spin_unlock_irq(&dev_priv->irq_lock);
-		queue_delayed_work(system_wq, &dev_priv->display.hotplug.hotplug_work, 0);
+		queue_delayed_work(dev_priv->unordered_wq,
+				   &dev_priv->display.hotplug.hotplug_work, 0);
 	}
 }
 
@@ -446,7 +448,8 @@  static void i915_hotplug_work_func(struct work_struct *work)
 		dev_priv->display.hotplug.retry_bits |= retry;
 		spin_unlock_irq(&dev_priv->irq_lock);
 
-		mod_delayed_work(system_wq, &dev_priv->display.hotplug.hotplug_work,
+		mod_delayed_work(dev_priv->unordered_wq,
+				 &dev_priv->display.hotplug.hotplug_work,
 				 msecs_to_jiffies(HPD_RETRY_DELAY));
 	}
 }
@@ -577,7 +580,8 @@  void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 	if (queue_dig)
 		queue_work(dev_priv->display.hotplug.dp_wq, &dev_priv->display.hotplug.dig_port_work);
 	if (queue_hp)
-		queue_delayed_work(system_wq, &dev_priv->display.hotplug.hotplug_work, 0);
+		queue_delayed_work(dev_priv->unordered_wq,
+				   &dev_priv->display.hotplug.hotplug_work, 0);
 }
 
 /**
@@ -687,7 +691,8 @@  void intel_hpd_poll_enable(struct drm_i915_private *dev_priv)
 	 * As well, there's no issue if we race here since we always reschedule
 	 * this worker anyway
 	 */
-	schedule_work(&dev_priv->display.hotplug.poll_init_work);
+	queue_work(dev_priv->unordered_wq,
+		   &dev_priv->display.hotplug.poll_init_work);
 }
 
 /**
@@ -715,7 +720,8 @@  void intel_hpd_poll_disable(struct drm_i915_private *dev_priv)
 		return;
 
 	WRITE_ONCE(dev_priv->display.hotplug.poll_enabled, false);
-	schedule_work(&dev_priv->display.hotplug.poll_init_work);
+	queue_work(dev_priv->unordered_wq,
+		   &dev_priv->display.hotplug.poll_init_work);
 }
 
 void intel_hpd_init_early(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
index b7973a05d022..84078fb82b2f 100644
--- a/drivers/gpu/drm/i915/display/intel_opregion.c
+++ b/drivers/gpu/drm/i915/display/intel_opregion.c
@@ -635,7 +635,8 @@  static void asle_work(struct work_struct *work)
 void intel_opregion_asle_intr(struct drm_i915_private *dev_priv)
 {
 	if (dev_priv->display.opregion.asle)
-		schedule_work(&dev_priv->display.opregion.asle_work);
+		queue_work(dev_priv->unordered_wq,
+			   &dev_priv->display.opregion.asle_work);
 }
 
 #define ACPI_EV_DISPLAY_SWITCH (1<<0)
diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
index 5e7ba594e7e7..73f0f1714b37 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.c
+++ b/drivers/gpu/drm/i915/display/intel_pps.c
@@ -867,6 +867,7 @@  static void edp_panel_vdd_work(struct work_struct *__work)
 
 static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	unsigned long delay;
 
 	/*
@@ -882,7 +883,8 @@  static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp)
 	 * operations.
 	 */
 	delay = msecs_to_jiffies(intel_dp->pps.panel_power_cycle_delay * 5);
-	schedule_delayed_work(&intel_dp->pps.panel_vdd_work, delay);
+	queue_delayed_work(i915->unordered_wq,
+			   &intel_dp->pps.panel_vdd_work, delay);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index ea0389c5f656..d58ed9b62e67 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -341,7 +341,7 @@  void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir)
 		 */
 		intel_de_rmw(dev_priv, imr_reg, 0, psr_irq_psr_error_bit_get(intel_dp));
 
-		schedule_work(&intel_dp->psr.work);
+		queue_work(dev_priv->unordered_wq, &intel_dp->psr.work);
 	}
 }
 
@@ -2440,6 +2440,8 @@  static void
 tgl_dc3co_flush_locked(struct intel_dp *intel_dp, unsigned int frontbuffer_bits,
 		       enum fb_op_origin origin)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
 	if (!intel_dp->psr.dc3co_exitline || !intel_dp->psr.psr2_enabled ||
 	    !intel_dp->psr.active)
 		return;
@@ -2453,7 +2455,7 @@  tgl_dc3co_flush_locked(struct intel_dp *intel_dp, unsigned int frontbuffer_bits,
 		return;
 
 	tgl_psr2_enable_dc3co(intel_dp);
-	mod_delayed_work(system_wq, &intel_dp->psr.dc3co_work,
+	mod_delayed_work(i915->unordered_wq, &intel_dp->psr.dc3co_work,
 			 intel_dp->psr.dc3co_exit_delay);
 }
 
@@ -2493,7 +2495,7 @@  static void _psr_flush_handle(struct intel_dp *intel_dp)
 		psr_force_hw_tracking_exit(intel_dp);
 
 		if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits)
-			schedule_work(&intel_dp->psr.work);
+			queue_work(dev_priv->unordered_wq, &intel_dp->psr.work);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 750326434677..2ebd937f3b4c 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2327,6 +2327,7 @@  static u32 active_ccid(struct intel_engine_cs *engine)
 
 static void execlists_capture(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
 	struct execlists_capture *cap;
 
 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
@@ -2375,7 +2376,7 @@  static void execlists_capture(struct intel_engine_cs *engine)
 		goto err_rq;
 
 	INIT_WORK(&cap->work, execlists_capture_work);
-	schedule_work(&cap->work);
+	queue_work(i915->unordered_wq, &cap->work);
 	return;
 
 err_rq:
@@ -3680,7 +3681,7 @@  static void virtual_context_destroy(struct kref *kref)
 	 * lock, we can delegate the free of the engine to an RCU worker.
 	 */
 	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
-	queue_rcu_work(system_wq, &ve->rcu);
+	queue_rcu_work(ve->context.engine->i915->unordered_wq, &ve->rcu);
 }
 
 static void virtual_engine_initial_hint(struct virtual_engine *ve)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index cadfd85785b1..86b5a9ba323d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -88,10 +88,11 @@  static void pool_free_work(struct work_struct *wrk)
 {
 	struct intel_gt_buffer_pool *pool =
 		container_of(wrk, typeof(*pool), work.work);
+	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
 
 	if (pool_free_older_than(pool, HZ))
-		schedule_delayed_work(&pool->work,
-				      round_jiffies_up_relative(HZ));
+		queue_delayed_work(gt->i915->unordered_wq, &pool->work,
+				   round_jiffies_up_relative(HZ));
 }
 
 static void pool_retire(struct i915_active *ref)
@@ -99,6 +100,7 @@  static void pool_retire(struct i915_active *ref)
 	struct intel_gt_buffer_pool_node *node =
 		container_of(ref, typeof(*node), active);
 	struct intel_gt_buffer_pool *pool = node->pool;
+	struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
 	struct list_head *list = bucket_for_size(pool, node->obj->base.size);
 	unsigned long flags;
 
@@ -116,8 +118,8 @@  static void pool_retire(struct i915_active *ref)
 	WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
 	spin_unlock_irqrestore(&pool->lock, flags);
 
-	schedule_delayed_work(&pool->work,
-			      round_jiffies_up_relative(HZ));
+	queue_delayed_work(gt->i915->unordered_wq, &pool->work,
+			   round_jiffies_up_relative(HZ));
 }
 
 void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 8f888d36f16d..62fd00c9e519 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -376,7 +376,7 @@  static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
 	if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
 		gt->i915->l3_parity.which_slice |= 1 << 0;
 
-	schedule_work(&gt->i915->l3_parity.error_work);
+	queue_work(gt->i915->unordered_wq, &gt->i915->l3_parity.error_work);
 }
 
 void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 1dfd01668c79..d1a382dfaa1d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -116,7 +116,7 @@  void intel_engine_add_retire(struct intel_engine_cs *engine,
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 
 	if (add_retire(engine, tl))
-		schedule_work(&engine->retire_work);
+		queue_work(engine->i915->unordered_wq, &engine->retire_work);
 }
 
 void intel_engine_init_retire(struct intel_engine_cs *engine)
@@ -207,8 +207,8 @@  static void retire_work_handler(struct work_struct *work)
 	struct intel_gt *gt =
 		container_of(work, typeof(*gt), requests.retire_work.work);
 
-	schedule_delayed_work(&gt->requests.retire_work,
-			      round_jiffies_up_relative(HZ));
+	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
+			   round_jiffies_up_relative(HZ));
 	intel_gt_retire_requests(gt);
 }
 
@@ -224,8 +224,8 @@  void intel_gt_park_requests(struct intel_gt *gt)
 
 void intel_gt_unpark_requests(struct intel_gt *gt)
 {
-	schedule_delayed_work(&gt->requests.retire_work,
-			      round_jiffies_up_relative(HZ));
+	queue_delayed_work(gt->i915->unordered_wq, &gt->requests.retire_work,
+			   round_jiffies_up_relative(HZ));
 }
 
 void intel_gt_fini_requests(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 195ff72d7a14..e2152f75ba2e 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1625,7 +1625,7 @@  void __intel_init_wedge(struct intel_wedge_me *w,
 	w->name = name;
 
 	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
-	schedule_delayed_work(&w->work, timeout);
+	queue_delayed_work(gt->i915->unordered_wq, &w->work, timeout);
 }
 
 void __intel_fini_wedge(struct intel_wedge_me *w)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index e68a99205599..e92e626d4994 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -73,13 +73,14 @@  static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
 static void rps_timer(struct timer_list *t)
 {
 	struct intel_rps *rps = from_timer(rps, t, timer);
+	struct intel_gt *gt = rps_to_gt(rps);
 	struct intel_engine_cs *engine;
 	ktime_t dt, last, timestamp;
 	enum intel_engine_id id;
 	s64 max_busy[3] = {};
 
 	timestamp = 0;
-	for_each_engine(engine, rps_to_gt(rps), id) {
+	for_each_engine(engine, gt, id) {
 		s64 busy;
 		int i;
 
@@ -123,7 +124,7 @@  static void rps_timer(struct timer_list *t)
 
 			busy += div_u64(max_busy[i], 1 << i);
 		}
-		GT_TRACE(rps_to_gt(rps),
+		GT_TRACE(gt,
 			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
 			 busy, (int)div64_u64(100 * busy, dt),
 			 max_busy[0], max_busy[1], max_busy[2],
@@ -133,12 +134,12 @@  static void rps_timer(struct timer_list *t)
 		    rps->cur_freq < rps->max_freq_softlimit) {
 			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
 			rps->pm_interval = 1;
-			schedule_work(&rps->work);
+			queue_work(gt->i915->unordered_wq, &rps->work);
 		} else if (100 * busy < rps->power.down_threshold * dt &&
 			   rps->cur_freq > rps->min_freq_softlimit) {
 			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
 			rps->pm_interval = 1;
-			schedule_work(&rps->work);
+			queue_work(gt->i915->unordered_wq, &rps->work);
 		} else {
 			rps->last_adj = 0;
 		}
@@ -973,7 +974,7 @@  static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
 	}
 	mutex_unlock(&rps->lock);
 	if (boost)
-		schedule_work(&rps->work);
+		queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
 
 	return 0;
 }
@@ -1025,7 +1026,8 @@  void intel_rps_boost(struct i915_request *rq)
 			if (!atomic_fetch_inc(&slpc->num_waiters)) {
 				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
 					 rq->fence.context, rq->fence.seqno);
-				schedule_work(&slpc->boost_work);
+				queue_work(rps_to_gt(rps)->i915->unordered_wq,
+					   &slpc->boost_work);
 			}
 
 			return;
@@ -1041,7 +1043,7 @@  void intel_rps_boost(struct i915_request *rq)
 			 rq->fence.context, rq->fence.seqno);
 
 		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
-			schedule_work(&rps->work);
+			queue_work(rps_to_gt(rps)->i915->unordered_wq, &rps->work);
 
 		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
 	}
@@ -1900,7 +1902,7 @@  void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 	gen6_gt_pm_mask_irq(gt, events);
 
 	rps->pm_iir |= events;
-	schedule_work(&rps->work);
+	queue_work(gt->i915->unordered_wq, &rps->work);
 }
 
 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
@@ -1917,7 +1919,7 @@  void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
 		gen6_gt_pm_mask_irq(gt, events);
 		rps->pm_iir |= events;
 
-		schedule_work(&rps->work);
+		queue_work(gt->i915->unordered_wq, &rps->work);
 		spin_unlock(gt->irq_lock);
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 542ce6d2de19..78cdfc6f315f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -27,7 +27,7 @@  static void perf_begin(struct intel_gt *gt)
 
 	/* Boost gpufreq to max [waitboost] and keep it fixed */
 	atomic_inc(&gt->rps.num_waiters);
-	schedule_work(&gt->rps.work);
+	queue_work(gt->i915->unordered_wq, &gt->rps.work);
 	flush_work(&gt->rps.work);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 522733a89946..88808aa85b26 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -132,8 +132,18 @@  static int i915_workqueues_init(struct drm_i915_private *dev_priv)
 	if (dev_priv->display.hotplug.dp_wq == NULL)
 		goto out_free_wq;
 
+	/*
+	 * The unordered i915 workqueue should be used for all work
+	 * scheduling that do not require running in order.
+	 */
+	dev_priv->unordered_wq = alloc_workqueue("i915-unordered", 0, 0);
+	if (dev_priv->unordered_wq == NULL)
+		goto out_free_dp_wq;
+
 	return 0;
 
+out_free_dp_wq:
+	destroy_workqueue(dev_priv->unordered_wq);
 out_free_wq:
 	destroy_workqueue(dev_priv->wq);
 out_err:
@@ -144,6 +154,7 @@  static int i915_workqueues_init(struct drm_i915_private *dev_priv)
 
 static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
 {
+	destroy_workqueue(dev_priv->unordered_wq);
 	destroy_workqueue(dev_priv->display.hotplug.dp_wq);
 	destroy_workqueue(dev_priv->wq);
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 14c5338c96a6..8f2665e8afb5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -259,6 +259,14 @@  struct drm_i915_private {
 	 */
 	struct workqueue_struct *wq;
 
+	/**
+	 * unordered_wq - internal workqueue for unordered work
+	 *
+	 * This workqueue should be used for all unordered work
+	 * scheduling within i915.
+	 */
+	struct workqueue_struct *unordered_wq;
+
 	/* pm private clock gating functions */
 	const struct drm_i915_clock_gating_funcs *clock_gating_funcs;
 
@@ -930,5 +938,4 @@  IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
 				       GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
-
 #endif
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 630a732aaecc..894068bb37b6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -290,7 +290,7 @@  static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
 
 	if (!i915_request_completed(rq)) {
 		if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
-			schedule_work(&gt->watchdog.work);
+			queue_work(gt->i915->unordered_wq, &gt->watchdog.work);
 	} else {
 		i915_request_put(rq);
 	}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 40aafe676017..497ea21a347e 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -75,7 +75,7 @@  void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
 
 	/* Assume we are not in process context and so cannot sleep. */
 	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
-		mod_delayed_work(system_wq, &wf->work,
+		mod_delayed_work(wf->i915->wq, &wf->work,
 				 FIELD_GET(INTEL_WAKEREF_PUT_DELAY, flags));
 		return;
 	}