diff mbox series

[RFC,v3,14/14] drm/sched: Scale deadlines depending on queue depth

Message ID 20250331201705.60663-15-tvrtko.ursulin@igalia.com (mailing list archive)
State New
Headers show
Series Deadline DRM scheduler | expand

Commit Message

Tvrtko Ursulin March 31, 2025, 8:17 p.m. UTC
So far deadline based scheduling was able to remove the need for separate
run queues (per priority) and alleviate the starvation issues hampering
FIFO when somewhat reasonable clients are concerned.

Because the deadline implementation is however still based on the
submission time as its baseline criteria, since the current DRM scheduler
design makes it difficult to perhaps consider job (or entity) "runnable"
timestamp as an alternative, it shares the same weakness as FIFO with
clients which rapidly submit deep job queues. In those cases deadline
scheduler will be similarly unfair as FIFO is.

One simple approach to somewhat alleviate that and apply some fairness is
to scale the relative deadlines by client queue depth.

Apart from queue depth scaling is based on client priority, where kernel
submissions are aggresively pulled in, while userspace priority levels are
pushed out proportionately to the decrease in priority.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <phasta@kernel.org>
Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 drivers/gpu/drm/scheduler/sched_entity.c   | 39 ++++++++++++----------
 drivers/gpu/drm/scheduler/sched_internal.h |  4 ---
 drivers/gpu/drm/scheduler/sched_rq.c       |  4 +--
 include/drm/gpu_scheduler.h                |  6 ++--
 4 files changed, 25 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index c6ed0d1642f3..98be867dcf41 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -387,21 +387,25 @@  static ktime_t
 __drm_sched_entity_get_job_deadline(struct drm_sched_entity *entity,
 				    ktime_t submit_ts)
 {
-	static const unsigned int d_us[] = {
-		[DRM_SCHED_PRIORITY_KERNEL] =    100,
-		[DRM_SCHED_PRIORITY_HIGH]   =   1000,
-		[DRM_SCHED_PRIORITY_NORMAL] =   5000,
-		[DRM_SCHED_PRIORITY_LOW]    = 100000,
+	static const long d_us[] = {
+		[DRM_SCHED_PRIORITY_KERNEL] = -1000,
+		[DRM_SCHED_PRIORITY_HIGH]   =  1000,
+		[DRM_SCHED_PRIORITY_NORMAL] =  2500,
+		[DRM_SCHED_PRIORITY_LOW]    = 10000,
 	};
+	static const unsigned int shift[] = {
+		[DRM_SCHED_PRIORITY_KERNEL] = 4,
+		[DRM_SCHED_PRIORITY_HIGH]   = 0,
+		[DRM_SCHED_PRIORITY_NORMAL] = 1,
+		[DRM_SCHED_PRIORITY_LOW]    = 2,
+	};
+	const unsigned int prio = entity->priority;
+	long d;
 
-	return ktime_add_us(submit_ts, d_us[entity->priority]);
-}
+	d = d_us[prio] *
+	    ((spsc_queue_count(&entity->job_queue) + 1) << shift[prio]);
 
-ktime_t
-drm_sched_entity_get_job_deadline(struct drm_sched_entity *entity,
-				  struct drm_sched_job *job)
-{
-	return __drm_sched_entity_get_job_deadline(entity, job->submit_ts);
+	return ktime_add_us(submit_ts, d);
 }
 
 /*
@@ -575,7 +579,7 @@  void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
 	struct drm_sched_entity *entity = sched_job->entity;
 	struct drm_gpu_scheduler *sched =
 		container_of(entity->rq, typeof(*sched), rq);
-	ktime_t submit_ts;
+	ktime_t deadline_ts;
 	bool first;
 
 	trace_drm_sched_job(sched_job, entity);
@@ -585,16 +589,15 @@  void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
 	/*
 	 * After the sched_job is pushed into the entity queue, it may be
 	 * completed and freed up at any time. We can no longer access it.
-	 * Make sure to set the submit_ts first, to avoid a race.
+	 * Make sure to set the deadline_ts first, to avoid a race.
 	 */
-	sched_job->submit_ts = submit_ts = ktime_get();
+	sched_job->deadline_ts = deadline_ts =
+		__drm_sched_entity_get_job_deadline(entity, ktime_get());
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
 	/* first job wakes up scheduler */
 	if (first) {
-		submit_ts = __drm_sched_entity_get_job_deadline(entity,
-								submit_ts);
-		sched = drm_sched_rq_add_entity(entity, submit_ts);
+		sched = drm_sched_rq_add_entity(entity, deadline_ts);
 		if (sched)
 			drm_sched_wakeup(sched);
 	}
diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h
index f50e54bfaccc..3d6e853e87b6 100644
--- a/drivers/gpu/drm/scheduler/sched_internal.h
+++ b/drivers/gpu/drm/scheduler/sched_internal.h
@@ -28,10 +28,6 @@  void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
 			       struct dma_fence *parent);
 void drm_sched_fence_finished(struct drm_sched_fence *fence, int result);
 
-
-ktime_t drm_sched_entity_get_job_deadline(struct drm_sched_entity *entity,
-					  struct drm_sched_job *job);
-
 /**
  * drm_sched_entity_queue_pop - Low level helper for popping queued jobs
  *
diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c
index 4b142a4c89d1..ffec9691d5a7 100644
--- a/drivers/gpu/drm/scheduler/sched_rq.c
+++ b/drivers/gpu/drm/scheduler/sched_rq.c
@@ -138,7 +138,6 @@  void drm_sched_rq_pop_entity(struct drm_sched_entity *entity)
 {
 	struct drm_sched_job *next_job;
 	struct drm_sched_rq *rq;
-	ktime_t ts;
 
 	/*
 	 * Update the entity's location in the min heap according to
@@ -148,11 +147,10 @@  void drm_sched_rq_pop_entity(struct drm_sched_entity *entity)
 	if (!next_job)
 		return;
 
-	ts = drm_sched_entity_get_job_deadline(entity, next_job);
 	spin_lock(&entity->lock);
 	rq = entity->rq;
 	spin_lock(&rq->lock);
-	drm_sched_rq_update_tree_locked(entity, rq, ts);
+	drm_sched_rq_update_tree_locked(entity, rq, next_job->deadline_ts);
 	spin_unlock(&rq->lock);
 	spin_unlock(&entity->lock);
 }
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 1e1dd16a0d9a..e0c3d84dd8b1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -331,11 +331,11 @@  struct drm_sched_job {
 	u64				id;
 
 	/**
-	 * @submit_ts:
+	 * @deadline_ts:
 	 *
-	 * When the job was pushed into the entity queue.
+	 * Job deadline set at push time.
 	 */
-	ktime_t                         submit_ts;
+	ktime_t                         deadline_ts;
 
 	/**
 	 * @sched: