diff mbox series

[v4,02/10] drm/sched: Convert drm scheduler to use a work queue rather than kthread

Message ID 20230919050155.2647172-3-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series DRM scheduler changes for Xe | expand

Commit Message

Matthew Brost Sept. 19, 2023, 5:01 a.m. UTC
In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
seems a bit odd but let us explain the reasoning below.

1. In XE the submission order from multiple drm_sched_entity is not
guaranteed to be the same completion even if targeting the same hardware
engine. This is because in XE we have a firmware scheduler, the GuC,
which allowed to reorder, timeslice, and preempt submissions. If a using
shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
apart as the TDR expects submission order == completion order. Using a
dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.

2. In XE submissions are done via programming a ring buffer (circular
buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
control on the ring for free.

A problem with this design is currently a drm_gpu_scheduler uses a
kthread for submission / job cleanup. This doesn't scale if a large
number of drm_gpu_scheduler are used. To work around the scaling issue,
use a worker rather than kthread for submission / job cleanup.

v2:
  - (Rob Clark) Fix msm build
  - Pass in run work queue
v3:
  - (Boris) don't have loop in worker
v4:
  - (Tvrtko) break out submit ready, stop, start helpers into own patch
v5:
  - (Boris) default to ordered work queue

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
 drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
 drivers/gpu/drm/lima/lima_sched.c          |   2 +-
 drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
 drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
 drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
 drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
 include/drm/gpu_scheduler.h                |  14 ++-
 9 files changed, 79 insertions(+), 75 deletions(-)

Comments

Luben Tuikov Sept. 27, 2023, 3:32 a.m. UTC | #1
Hi,

On 2023-09-19 01:01, Matthew Brost wrote:
> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
> seems a bit odd but let us explain the reasoning below.
> 
> 1. In XE the submission order from multiple drm_sched_entity is not
> guaranteed to be the same completion even if targeting the same hardware
> engine. This is because in XE we have a firmware scheduler, the GuC,
> which allowed to reorder, timeslice, and preempt submissions. If a using
> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
> apart as the TDR expects submission order == completion order. Using a
> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
> 
> 2. In XE submissions are done via programming a ring buffer (circular
> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
> control on the ring for free.
> 
> A problem with this design is currently a drm_gpu_scheduler uses a
> kthread for submission / job cleanup. This doesn't scale if a large
> number of drm_gpu_scheduler are used. To work around the scaling issue,
> use a worker rather than kthread for submission / job cleanup.
> 
> v2:
>   - (Rob Clark) Fix msm build
>   - Pass in run work queue
> v3:
>   - (Boris) don't have loop in worker
> v4:
>   - (Tvrtko) break out submit ready, stop, start helpers into own patch
> v5:
>   - (Boris) default to ordered work queue
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>  drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>  drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>  drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>  drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>  drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>  drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>  include/drm/gpu_scheduler.h                |  14 ++-
>  9 files changed, 79 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e366f61c3aed..16f3cfe1574a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>  			break;
>  		}
>  
> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>  				   ring->num_hw_submission, 0,
>  				   timeout, adev->reset_domain->wq,
>  				   ring->sched_score, ring->name,
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index 345fec6cb1a4..618a804ddc34 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>  {
>  	int ret;
>  
> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>  			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>  			     msecs_to_jiffies(500), NULL, NULL,
>  			     dev_name(gpu->dev), gpu->dev);
> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> index ffd91a5ee299..8d858aed0e56 100644
> --- a/drivers/gpu/drm/lima/lima_sched.c
> +++ b/drivers/gpu/drm/lima/lima_sched.c
> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>  
>  	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>  
> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>  			      lima_job_hang_limit,
>  			      msecs_to_jiffies(timeout), NULL,
>  			      NULL, name, pipe->ldev->dev);
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> index 40c0bc35a44c..b8865e61b40f 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>  	 /* currently managing hangcheck ourselves: */
>  	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>  
> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>  			num_hw_submissions, 0, sched_timeout,
>  			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);

checkpatch.pl complains here about unmatched open parens.

>  	if (ret) {
> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
> index 88217185e0f3..d458c2227d4f 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>  	if (!drm->sched_wq)
>  		return -ENOMEM;
>  
> -	return drm_sched_init(sched, &nouveau_sched_ops,
> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>  			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>  			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>  }
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 033f5e684707..326ca1ddf1d7 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>  		js->queue[j].fence_context = dma_fence_context_alloc(1);
>  
>  		ret = drm_sched_init(&js->queue[j].sched,
> -				     &panfrost_sched_ops,
> +				     &panfrost_sched_ops, NULL,
>  				     nentries, 0,
>  				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>  				     pfdev->reset.wq,
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index e4fa62abca41..ee6281942e36 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -48,7 +48,6 @@
>   * through the jobs entity pointer.
>   */
>  
> -#include <linux/kthread.h>
>  #include <linux/wait.h>
>  #include <linux/sched.h>
>  #include <linux/completion.h>
> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>  	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>  }
>  
> +/**
> + * drm_sched_submit_queue - scheduler queue submission

There is no verb in the description, and is not clear what
this function does unless one reads the code. Given that this
is DOC, this should be clearer here. Something like "queue
scheduler work to be executed" or something to that effect.

Coming back to this from reading the patch below, it was somewhat
unclear what "drm_sched_submit_queue()" does, since when reading
below, "submit" was being read by my mind as an adjective, as opposed
to a verb. Perhaps something like:

drm_sched_queue_submit(), or
drm_sched_queue_exec(), or
drm_sched_queue_push(), or something to that effect. You pick. :-)

Note that it doesn't have to be 100% reflective of the fact that
we're putting this on a workqueue and it would be executed sooner
or later, so long as it conveys the fact that we're executing this
scheduler queue.

> + * @sched: scheduler instance
> + */
> +static void drm_sched_submit_queue(struct drm_gpu_scheduler *sched)
> +{
> +	if (!READ_ONCE(sched->pause_submit))
> +		queue_work(sched->submit_wq, &sched->work_submit);
> +}
> +
>  /**
>   * drm_sched_job_done - complete a job
>   * @s_job: pointer to the job which is done
> @@ -275,7 +284,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
>  	dma_fence_get(&s_fence->finished);
>  	drm_sched_fence_finished(s_fence, result);
>  	dma_fence_put(&s_fence->finished);
> -	wake_up_interruptible(&sched->wake_up_worker);
> +	drm_sched_submit_queue(sched);
>  }
>  
>  /**
> @@ -868,7 +877,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
>  void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
>  {
>  	if (drm_sched_can_queue(sched))
> -		wake_up_interruptible(&sched->wake_up_worker);
> +		drm_sched_submit_queue(sched);
>  }
>  
>  /**
> @@ -978,61 +987,42 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
>  }
>  EXPORT_SYMBOL(drm_sched_pick_best);
>  
> -/**
> - * drm_sched_blocked - check if the scheduler is blocked
> - *
> - * @sched: scheduler instance
> - *
> - * Returns true if blocked, otherwise false.
> - */
> -static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
> -{
> -	if (kthread_should_park()) {
> -		kthread_parkme();
> -		return true;
> -	}
> -
> -	return false;
> -}
> -
>  /**
>   * drm_sched_main - main scheduler thread
>   *
>   * @param: scheduler instance
> - *
> - * Returns 0.
>   */
> -static int drm_sched_main(void *param)
> +static void drm_sched_main(struct work_struct *w)
>  {
> -	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
> +	struct drm_gpu_scheduler *sched =
> +		container_of(w, struct drm_gpu_scheduler, work_submit);
> +	struct drm_sched_entity *entity;
> +	struct drm_sched_job *cleanup_job;
>  	int r;
>  
> -	sched_set_fifo_low(current);
> +	if (READ_ONCE(sched->pause_submit))
> +		return;
>  
> -	while (!kthread_should_stop()) {
> -		struct drm_sched_entity *entity = NULL;
> -		struct drm_sched_fence *s_fence;
> -		struct drm_sched_job *sched_job;
> -		struct dma_fence *fence;
> -		struct drm_sched_job *cleanup_job = NULL;
> +	cleanup_job = drm_sched_get_cleanup_job(sched);
> +	entity = drm_sched_select_entity(sched);
>  
> -		wait_event_interruptible(sched->wake_up_worker,
> -					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
> -					 (!drm_sched_blocked(sched) &&
> -					  (entity = drm_sched_select_entity(sched))) ||
> -					 kthread_should_stop());
> +	if (!entity && !cleanup_job)
> +		return;	/* No more work */
>  
> -		if (cleanup_job)
> -			sched->ops->free_job(cleanup_job);
> +	if (cleanup_job)
> +		sched->ops->free_job(cleanup_job);
>  
> -		if (!entity)
> -			continue;
> +	if (entity) {
> +		struct dma_fence *fence;
> +		struct drm_sched_fence *s_fence;
> +		struct drm_sched_job *sched_job;
>  
>  		sched_job = drm_sched_entity_pop_job(entity);
> -
>  		if (!sched_job) {
>  			complete_all(&entity->entity_idle);
> -			continue;
> +			if (!cleanup_job)
> +				return;	/* No more work */
> +			goto again;
>  		}
>  
>  		s_fence = sched_job->s_fence;
> @@ -1063,7 +1053,9 @@ static int drm_sched_main(void *param)
>  
>  		wake_up(&sched->job_scheduled);
>  	}
> -	return 0;
> +
> +again:
> +	drm_sched_submit_queue(sched);
>  }
>  
>  /**
> @@ -1071,6 +1063,8 @@ static int drm_sched_main(void *param)
>   *
>   * @sched: scheduler instance
>   * @ops: backend operations for this scheduler
> + * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
> + *	       allocated and used
>   * @hw_submission: number of hw submissions that can be in flight
>   * @hang_limit: number of times to allow a job to hang before dropping it
>   * @timeout: timeout value in jiffies for the scheduler
> @@ -1084,14 +1078,25 @@ static int drm_sched_main(void *param)
>   */
>  int drm_sched_init(struct drm_gpu_scheduler *sched,
>  		   const struct drm_sched_backend_ops *ops,
> +		   struct workqueue_struct *submit_wq,
>  		   unsigned hw_submission, unsigned hang_limit,
>  		   long timeout, struct workqueue_struct *timeout_wq,
>  		   atomic_t *score, const char *name, struct device *dev)
>  {
> -	int i, ret;
> +	int i;
>  	sched->ops = ops;
>  	sched->hw_submission_limit = hw_submission;
>  	sched->name = name;
> +	if (!submit_wq) {
> +		sched->submit_wq = alloc_ordered_workqueue(name, 0);
> +		if (!sched->submit_wq)
> +			return -ENOMEM;
> +
> +		sched->alloc_submit_wq = true;
> +	} else {
> +		sched->submit_wq = submit_wq;
> +		sched->alloc_submit_wq = false;
> +	}

This if-conditional, I would've written:

	if (submit_wq) {
		sched->submit_wq = submit_wq;
		sched->alloc_submit_wq = false;
	} else {
		sched->submit_wq = alloc_ordered_workqueue(name, 0);
		if (!sched->submit_wq)
			return -ENOMEM;

		sched->alloc_submit_wq = true;
	}

It's easier to understand testing for positivity, than negativity.


>  	sched->timeout = timeout;
>  	sched->timeout_wq = timeout_wq ? : system_wq;
>  	sched->hang_limit = hang_limit;
> @@ -1100,23 +1105,15 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>  	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
>  		drm_sched_rq_init(sched, &sched->sched_rq[i]);
>  
> -	init_waitqueue_head(&sched->wake_up_worker);
>  	init_waitqueue_head(&sched->job_scheduled);
>  	INIT_LIST_HEAD(&sched->pending_list);
>  	spin_lock_init(&sched->job_list_lock);
>  	atomic_set(&sched->hw_rq_count, 0);
>  	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
> +	INIT_WORK(&sched->work_submit, drm_sched_main);
>  	atomic_set(&sched->_score, 0);
>  	atomic64_set(&sched->job_id_count, 0);
> -
> -	/* Each scheduler will run on a seperate kernel thread */
> -	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
> -	if (IS_ERR(sched->thread)) {
> -		ret = PTR_ERR(sched->thread);
> -		sched->thread = NULL;
> -		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
> -		return ret;
> -	}
> +	sched->pause_submit = false;
>  
>  	sched->ready = true;
>  	return 0;
> @@ -1135,8 +1132,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
>  	struct drm_sched_entity *s_entity;
>  	int i;
>  
> -	if (sched->thread)
> -		kthread_stop(sched->thread);
> +	drm_sched_submit_stop(sched);
>  
>  	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
>  		struct drm_sched_rq *rq = &sched->sched_rq[i];
> @@ -1159,6 +1155,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
>  	/* Confirm no work left behind accessing device structures */
>  	cancel_delayed_work_sync(&sched->work_tdr);
>  
> +	if (sched->alloc_submit_wq)
> +		destroy_workqueue(sched->submit_wq);
>  	sched->ready = false;
>  }
>  EXPORT_SYMBOL(drm_sched_fini);
> @@ -1216,7 +1214,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
>   */
>  bool drm_sched_submit_ready(struct drm_gpu_scheduler *sched)
>  {
> -	return !!sched->thread;
> +	return sched->ready;
>  
>  }
>  EXPORT_SYMBOL(drm_sched_submit_ready);
> @@ -1228,7 +1226,8 @@ EXPORT_SYMBOL(drm_sched_submit_ready);
>   */
>  void drm_sched_submit_stop(struct drm_gpu_scheduler *sched)
>  {
> -	kthread_park(sched->thread);
> +	WRITE_ONCE(sched->pause_submit, true);
> +	cancel_work_sync(&sched->work_submit);
>  }
>  EXPORT_SYMBOL(drm_sched_submit_stop);
>  
> @@ -1239,6 +1238,7 @@ EXPORT_SYMBOL(drm_sched_submit_stop);
>   */
>  void drm_sched_submit_start(struct drm_gpu_scheduler *sched)
>  {
> -	kthread_unpark(sched->thread);
> +	WRITE_ONCE(sched->pause_submit, false);
> +	queue_work(sched->submit_wq, &sched->work_submit);
>  }
>  EXPORT_SYMBOL(drm_sched_submit_start);
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 06238e6d7f5c..38e092ea41e6 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  	int ret;
>  
>  	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
> -			     &v3d_bin_sched_ops,
> +			     &v3d_bin_sched_ops, NULL,
>  			     hw_jobs_limit, job_hang_limit,
>  			     msecs_to_jiffies(hang_limit_ms), NULL,
>  			     NULL, "v3d_bin", v3d->drm.dev);
> @@ -396,7 +396,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  		return ret;
>  
>  	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
> -			     &v3d_render_sched_ops,
> +			     &v3d_render_sched_ops, NULL,
>  			     hw_jobs_limit, job_hang_limit,
>  			     msecs_to_jiffies(hang_limit_ms), NULL,
>  			     NULL, "v3d_render", v3d->drm.dev);
> @@ -404,7 +404,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  		goto fail;
>  
>  	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
> -			     &v3d_tfu_sched_ops,
> +			     &v3d_tfu_sched_ops, NULL,
>  			     hw_jobs_limit, job_hang_limit,
>  			     msecs_to_jiffies(hang_limit_ms), NULL,
>  			     NULL, "v3d_tfu", v3d->drm.dev);
> @@ -413,7 +413,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  
>  	if (v3d_has_csd(v3d)) {
>  		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
> -				     &v3d_csd_sched_ops,
> +				     &v3d_csd_sched_ops, NULL,
>  				     hw_jobs_limit, job_hang_limit,
>  				     msecs_to_jiffies(hang_limit_ms), NULL,
>  				     NULL, "v3d_csd", v3d->drm.dev);
> @@ -421,7 +421,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  			goto fail;
>  
>  		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
> -				     &v3d_cache_clean_sched_ops,
> +				     &v3d_cache_clean_sched_ops, NULL,
>  				     hw_jobs_limit, job_hang_limit,
>  				     msecs_to_jiffies(hang_limit_ms), NULL,
>  				     NULL, "v3d_cache_clean", v3d->drm.dev);
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index f12c5aea5294..95927c52383c 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -473,17 +473,16 @@ struct drm_sched_backend_ops {
>   * @timeout: the time after which a job is removed from the scheduler.
>   * @name: name of the ring for which this scheduler is being used.
>   * @sched_rq: priority wise array of run queues.
> - * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
> - *                  is ready to be scheduled.
>   * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
>   *                 waits on this wait queue until all the scheduled jobs are
>   *                 finished.
>   * @hw_rq_count: the number of jobs currently in the hardware queue.
>   * @job_id_count: used to assign unique id to the each job.
> + * @submit_wq: workqueue used to queue @work_submit
>   * @timeout_wq: workqueue used to queue @work_tdr
> + * @work_submit: schedules jobs and cleans up entities
>   * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
>   *            timeout interval is over.
> - * @thread: the kthread on which the scheduler which run.
>   * @pending_list: the list of jobs which are currently in the job queue.
>   * @job_list_lock: lock to protect the pending_list.
>   * @hang_limit: once the hangs by a job crosses this limit then it is marked
> @@ -492,6 +491,8 @@ struct drm_sched_backend_ops {
>   * @_score: score used when the driver doesn't provide one
>   * @ready: marks if the underlying HW is ready to work
>   * @free_guilty: A hit to time out handler to free the guilty job.
> + * @pause_submit: pause queuing of @work_submit on @submit_wq
> + * @alloc_submit_wq: scheduler own allocation of @submit_wq
>   * @dev: system &struct device
>   *
>   * One scheduler is implemented for each hardware ring.
> @@ -502,13 +503,13 @@ struct drm_gpu_scheduler {
>  	long				timeout;
>  	const char			*name;
>  	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_COUNT];
> -	wait_queue_head_t		wake_up_worker;
>  	wait_queue_head_t		job_scheduled;
>  	atomic_t			hw_rq_count;
>  	atomic64_t			job_id_count;
> +	struct workqueue_struct		*submit_wq;
>  	struct workqueue_struct		*timeout_wq;
> +	struct work_struct		work_submit;
>  	struct delayed_work		work_tdr;
> -	struct task_struct		*thread;
>  	struct list_head		pending_list;
>  	spinlock_t			job_list_lock;
>  	int				hang_limit;
> @@ -516,11 +517,14 @@ struct drm_gpu_scheduler {
>  	atomic_t                        _score;
>  	bool				ready;
>  	bool				free_guilty;
> +	bool				pause_submit;
> +	bool				alloc_submit_wq;

Please rename it to what it actually describes:

alloc_submit_wq --> own_submit_wq

to mean "do we own the submit wq". Then the check becomes
the intuitive,
	if (sched->own_submit_wq)
		destroy_workqueue(sched->submit_wq);

>  	struct device			*dev;
>  };
>  
>  int drm_sched_init(struct drm_gpu_scheduler *sched,
>  		   const struct drm_sched_backend_ops *ops,
> +		   struct workqueue_struct *submit_wq,
>  		   uint32_t hw_submission, unsigned hang_limit,
>  		   long timeout, struct workqueue_struct *timeout_wq,
>  		   atomic_t *score, const char *name, struct device *dev);

This is a good patch.
Matthew Brost Oct. 5, 2023, 3:33 a.m. UTC | #2
On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
> Hi,
> 
> On 2023-09-19 01:01, Matthew Brost wrote:
> > In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
> > mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
> > seems a bit odd but let us explain the reasoning below.
> > 
> > 1. In XE the submission order from multiple drm_sched_entity is not
> > guaranteed to be the same completion even if targeting the same hardware
> > engine. This is because in XE we have a firmware scheduler, the GuC,
> > which allowed to reorder, timeslice, and preempt submissions. If a using
> > shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
> > apart as the TDR expects submission order == completion order. Using a
> > dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
> > 
> > 2. In XE submissions are done via programming a ring buffer (circular
> > buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
> > limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
> > control on the ring for free.
> > 
> > A problem with this design is currently a drm_gpu_scheduler uses a
> > kthread for submission / job cleanup. This doesn't scale if a large
> > number of drm_gpu_scheduler are used. To work around the scaling issue,
> > use a worker rather than kthread for submission / job cleanup.
> > 
> > v2:
> >   - (Rob Clark) Fix msm build
> >   - Pass in run work queue
> > v3:
> >   - (Boris) don't have loop in worker
> > v4:
> >   - (Tvrtko) break out submit ready, stop, start helpers into own patch
> > v5:
> >   - (Boris) default to ordered work queue
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
> >  drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
> >  drivers/gpu/drm/lima/lima_sched.c          |   2 +-
> >  drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
> >  drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
> >  drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
> >  drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
> >  drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
> >  include/drm/gpu_scheduler.h                |  14 ++-
> >  9 files changed, 79 insertions(+), 75 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index e366f61c3aed..16f3cfe1574a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
> >  			break;
> >  		}
> >  
> > -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
> > +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
> >  				   ring->num_hw_submission, 0,
> >  				   timeout, adev->reset_domain->wq,
> >  				   ring->sched_score, ring->name,
> > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > index 345fec6cb1a4..618a804ddc34 100644
> > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
> >  {
> >  	int ret;
> >  
> > -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
> > +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
> >  			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> >  			     msecs_to_jiffies(500), NULL, NULL,
> >  			     dev_name(gpu->dev), gpu->dev);
> > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> > index ffd91a5ee299..8d858aed0e56 100644
> > --- a/drivers/gpu/drm/lima/lima_sched.c
> > +++ b/drivers/gpu/drm/lima/lima_sched.c
> > @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> >  
> >  	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
> >  
> > -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
> > +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
> >  			      lima_job_hang_limit,
> >  			      msecs_to_jiffies(timeout), NULL,
> >  			      NULL, name, pipe->ldev->dev);
> > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > index 40c0bc35a44c..b8865e61b40f 100644
> > --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
> >  	 /* currently managing hangcheck ourselves: */
> >  	sched_timeout = MAX_SCHEDULE_TIMEOUT;
> >  
> > -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
> > +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
> >  			num_hw_submissions, 0, sched_timeout,
> >  			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
> 
> checkpatch.pl complains here about unmatched open parens.
> 

Will fix and run checkpatch before posting next rev.

> >  	if (ret) {
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > index 88217185e0f3..d458c2227d4f 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
> >  	if (!drm->sched_wq)
> >  		return -ENOMEM;
> >  
> > -	return drm_sched_init(sched, &nouveau_sched_ops,
> > +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
> >  			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
> >  			      NULL, NULL, "nouveau_sched", drm->dev->dev);
> >  }
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 033f5e684707..326ca1ddf1d7 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
> >  		js->queue[j].fence_context = dma_fence_context_alloc(1);
> >  
> >  		ret = drm_sched_init(&js->queue[j].sched,
> > -				     &panfrost_sched_ops,
> > +				     &panfrost_sched_ops, NULL,
> >  				     nentries, 0,
> >  				     msecs_to_jiffies(JOB_TIMEOUT_MS),
> >  				     pfdev->reset.wq,
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > index e4fa62abca41..ee6281942e36 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -48,7 +48,6 @@
> >   * through the jobs entity pointer.
> >   */
> >  
> > -#include <linux/kthread.h>
> >  #include <linux/wait.h>
> >  #include <linux/sched.h>
> >  #include <linux/completion.h>
> > @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
> >  	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
> >  }
> >  
> > +/**
> > + * drm_sched_submit_queue - scheduler queue submission
> 
> There is no verb in the description, and is not clear what
> this function does unless one reads the code. Given that this
> is DOC, this should be clearer here. Something like "queue
> scheduler work to be executed" or something to that effect.
>

Will fix.
 
> Coming back to this from reading the patch below, it was somewhat
> unclear what "drm_sched_submit_queue()" does, since when reading
> below, "submit" was being read by my mind as an adjective, as opposed
> to a verb. Perhaps something like:
> 
> drm_sched_queue_submit(), or
> drm_sched_queue_exec(), or
> drm_sched_queue_push(), or something to that effect. You pick. :-)
>

I prefer the name as is. In this patch we have:

drm_sched_submit_queue()
drm_sched_submit_start)
drm_sched_submit_stop()
drm_sched_submit_ready()

I like all these functions start with 'drm_sched_submit' which allows
for easy searching for the functions that touch the DRM scheduler
submission state.

With a little better doc are you fine with the names as is.

> Note that it doesn't have to be 100% reflective of the fact that
> we're putting this on a workqueue and it would be executed sooner
> or later, so long as it conveys the fact that we're executing this
> scheduler queue.
> 
> > + * @sched: scheduler instance
> > + */
> > +static void drm_sched_submit_queue(struct drm_gpu_scheduler *sched)
> > +{
> > +	if (!READ_ONCE(sched->pause_submit))
> > +		queue_work(sched->submit_wq, &sched->work_submit);
> > +}
> > +
> >  /**
> >   * drm_sched_job_done - complete a job
> >   * @s_job: pointer to the job which is done
> > @@ -275,7 +284,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
> >  	dma_fence_get(&s_fence->finished);
> >  	drm_sched_fence_finished(s_fence, result);
> >  	dma_fence_put(&s_fence->finished);
> > -	wake_up_interruptible(&sched->wake_up_worker);
> > +	drm_sched_submit_queue(sched);
> >  }
> >  
> >  /**
> > @@ -868,7 +877,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
> >  void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
> >  {
> >  	if (drm_sched_can_queue(sched))
> > -		wake_up_interruptible(&sched->wake_up_worker);
> > +		drm_sched_submit_queue(sched);
> >  }
> >  
> >  /**
> > @@ -978,61 +987,42 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
> >  }
> >  EXPORT_SYMBOL(drm_sched_pick_best);
> >  
> > -/**
> > - * drm_sched_blocked - check if the scheduler is blocked
> > - *
> > - * @sched: scheduler instance
> > - *
> > - * Returns true if blocked, otherwise false.
> > - */
> > -static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
> > -{
> > -	if (kthread_should_park()) {
> > -		kthread_parkme();
> > -		return true;
> > -	}
> > -
> > -	return false;
> > -}
> > -
> >  /**
> >   * drm_sched_main - main scheduler thread
> >   *
> >   * @param: scheduler instance
> > - *
> > - * Returns 0.
> >   */
> > -static int drm_sched_main(void *param)
> > +static void drm_sched_main(struct work_struct *w)
> >  {
> > -	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
> > +	struct drm_gpu_scheduler *sched =
> > +		container_of(w, struct drm_gpu_scheduler, work_submit);
> > +	struct drm_sched_entity *entity;
> > +	struct drm_sched_job *cleanup_job;
> >  	int r;
> >  
> > -	sched_set_fifo_low(current);
> > +	if (READ_ONCE(sched->pause_submit))
> > +		return;
> >  
> > -	while (!kthread_should_stop()) {
> > -		struct drm_sched_entity *entity = NULL;
> > -		struct drm_sched_fence *s_fence;
> > -		struct drm_sched_job *sched_job;
> > -		struct dma_fence *fence;
> > -		struct drm_sched_job *cleanup_job = NULL;
> > +	cleanup_job = drm_sched_get_cleanup_job(sched);
> > +	entity = drm_sched_select_entity(sched);
> >  
> > -		wait_event_interruptible(sched->wake_up_worker,
> > -					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
> > -					 (!drm_sched_blocked(sched) &&
> > -					  (entity = drm_sched_select_entity(sched))) ||
> > -					 kthread_should_stop());
> > +	if (!entity && !cleanup_job)
> > +		return;	/* No more work */
> >  
> > -		if (cleanup_job)
> > -			sched->ops->free_job(cleanup_job);
> > +	if (cleanup_job)
> > +		sched->ops->free_job(cleanup_job);
> >  
> > -		if (!entity)
> > -			continue;
> > +	if (entity) {
> > +		struct dma_fence *fence;
> > +		struct drm_sched_fence *s_fence;
> > +		struct drm_sched_job *sched_job;
> >  
> >  		sched_job = drm_sched_entity_pop_job(entity);
> > -
> >  		if (!sched_job) {
> >  			complete_all(&entity->entity_idle);
> > -			continue;
> > +			if (!cleanup_job)
> > +				return;	/* No more work */
> > +			goto again;
> >  		}
> >  
> >  		s_fence = sched_job->s_fence;
> > @@ -1063,7 +1053,9 @@ static int drm_sched_main(void *param)
> >  
> >  		wake_up(&sched->job_scheduled);
> >  	}
> > -	return 0;
> > +
> > +again:
> > +	drm_sched_submit_queue(sched);
> >  }
> >  
> >  /**
> > @@ -1071,6 +1063,8 @@ static int drm_sched_main(void *param)
> >   *
> >   * @sched: scheduler instance
> >   * @ops: backend operations for this scheduler
> > + * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
> > + *	       allocated and used
> >   * @hw_submission: number of hw submissions that can be in flight
> >   * @hang_limit: number of times to allow a job to hang before dropping it
> >   * @timeout: timeout value in jiffies for the scheduler
> > @@ -1084,14 +1078,25 @@ static int drm_sched_main(void *param)
> >   */
> >  int drm_sched_init(struct drm_gpu_scheduler *sched,
> >  		   const struct drm_sched_backend_ops *ops,
> > +		   struct workqueue_struct *submit_wq,
> >  		   unsigned hw_submission, unsigned hang_limit,
> >  		   long timeout, struct workqueue_struct *timeout_wq,
> >  		   atomic_t *score, const char *name, struct device *dev)
> >  {
> > -	int i, ret;
> > +	int i;
> >  	sched->ops = ops;
> >  	sched->hw_submission_limit = hw_submission;
> >  	sched->name = name;
> > +	if (!submit_wq) {
> > +		sched->submit_wq = alloc_ordered_workqueue(name, 0);
> > +		if (!sched->submit_wq)
> > +			return -ENOMEM;
> > +
> > +		sched->alloc_submit_wq = true;
> > +	} else {
> > +		sched->submit_wq = submit_wq;
> > +		sched->alloc_submit_wq = false;
> > +	}
> 
> This if-conditional, I would've written:
> 
> 	if (submit_wq) {
> 		sched->submit_wq = submit_wq;
> 		sched->alloc_submit_wq = false;
> 	} else {
> 		sched->submit_wq = alloc_ordered_workqueue(name, 0);
> 		if (!sched->submit_wq)
> 			return -ENOMEM;
> 
> 		sched->alloc_submit_wq = true;
> 	}
> 
> It's easier to understand testing for positivity, than negativity.
>

+1, will do this all in future patches.
 
> 
> >  	sched->timeout = timeout;
> >  	sched->timeout_wq = timeout_wq ? : system_wq;
> >  	sched->hang_limit = hang_limit;
> > @@ -1100,23 +1105,15 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >  	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
> >  		drm_sched_rq_init(sched, &sched->sched_rq[i]);
> >  
> > -	init_waitqueue_head(&sched->wake_up_worker);
> >  	init_waitqueue_head(&sched->job_scheduled);
> >  	INIT_LIST_HEAD(&sched->pending_list);
> >  	spin_lock_init(&sched->job_list_lock);
> >  	atomic_set(&sched->hw_rq_count, 0);
> >  	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
> > +	INIT_WORK(&sched->work_submit, drm_sched_main);
> >  	atomic_set(&sched->_score, 0);
> >  	atomic64_set(&sched->job_id_count, 0);
> > -
> > -	/* Each scheduler will run on a seperate kernel thread */
> > -	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
> > -	if (IS_ERR(sched->thread)) {
> > -		ret = PTR_ERR(sched->thread);
> > -		sched->thread = NULL;
> > -		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
> > -		return ret;
> > -	}
> > +	sched->pause_submit = false;
> >  
> >  	sched->ready = true;
> >  	return 0;
> > @@ -1135,8 +1132,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
> >  	struct drm_sched_entity *s_entity;
> >  	int i;
> >  
> > -	if (sched->thread)
> > -		kthread_stop(sched->thread);
> > +	drm_sched_submit_stop(sched);
> >  
> >  	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
> >  		struct drm_sched_rq *rq = &sched->sched_rq[i];
> > @@ -1159,6 +1155,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
> >  	/* Confirm no work left behind accessing device structures */
> >  	cancel_delayed_work_sync(&sched->work_tdr);
> >  
> > +	if (sched->alloc_submit_wq)
> > +		destroy_workqueue(sched->submit_wq);
> >  	sched->ready = false;
> >  }
> >  EXPORT_SYMBOL(drm_sched_fini);
> > @@ -1216,7 +1214,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
> >   */
> >  bool drm_sched_submit_ready(struct drm_gpu_scheduler *sched)
> >  {
> > -	return !!sched->thread;
> > +	return sched->ready;
> >  
> >  }
> >  EXPORT_SYMBOL(drm_sched_submit_ready);
> > @@ -1228,7 +1226,8 @@ EXPORT_SYMBOL(drm_sched_submit_ready);
> >   */
> >  void drm_sched_submit_stop(struct drm_gpu_scheduler *sched)
> >  {
> > -	kthread_park(sched->thread);
> > +	WRITE_ONCE(sched->pause_submit, true);
> > +	cancel_work_sync(&sched->work_submit);
> >  }
> >  EXPORT_SYMBOL(drm_sched_submit_stop);
> >  
> > @@ -1239,6 +1238,7 @@ EXPORT_SYMBOL(drm_sched_submit_stop);
> >   */
> >  void drm_sched_submit_start(struct drm_gpu_scheduler *sched)
> >  {
> > -	kthread_unpark(sched->thread);
> > +	WRITE_ONCE(sched->pause_submit, false);
> > +	queue_work(sched->submit_wq, &sched->work_submit);
> >  }
> >  EXPORT_SYMBOL(drm_sched_submit_start);
> > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> > index 06238e6d7f5c..38e092ea41e6 100644
> > --- a/drivers/gpu/drm/v3d/v3d_sched.c
> > +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> > @@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d)
> >  	int ret;
> >  
> >  	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
> > -			     &v3d_bin_sched_ops,
> > +			     &v3d_bin_sched_ops, NULL,
> >  			     hw_jobs_limit, job_hang_limit,
> >  			     msecs_to_jiffies(hang_limit_ms), NULL,
> >  			     NULL, "v3d_bin", v3d->drm.dev);
> > @@ -396,7 +396,7 @@ v3d_sched_init(struct v3d_dev *v3d)
> >  		return ret;
> >  
> >  	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
> > -			     &v3d_render_sched_ops,
> > +			     &v3d_render_sched_ops, NULL,
> >  			     hw_jobs_limit, job_hang_limit,
> >  			     msecs_to_jiffies(hang_limit_ms), NULL,
> >  			     NULL, "v3d_render", v3d->drm.dev);
> > @@ -404,7 +404,7 @@ v3d_sched_init(struct v3d_dev *v3d)
> >  		goto fail;
> >  
> >  	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
> > -			     &v3d_tfu_sched_ops,
> > +			     &v3d_tfu_sched_ops, NULL,
> >  			     hw_jobs_limit, job_hang_limit,
> >  			     msecs_to_jiffies(hang_limit_ms), NULL,
> >  			     NULL, "v3d_tfu", v3d->drm.dev);
> > @@ -413,7 +413,7 @@ v3d_sched_init(struct v3d_dev *v3d)
> >  
> >  	if (v3d_has_csd(v3d)) {
> >  		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
> > -				     &v3d_csd_sched_ops,
> > +				     &v3d_csd_sched_ops, NULL,
> >  				     hw_jobs_limit, job_hang_limit,
> >  				     msecs_to_jiffies(hang_limit_ms), NULL,
> >  				     NULL, "v3d_csd", v3d->drm.dev);
> > @@ -421,7 +421,7 @@ v3d_sched_init(struct v3d_dev *v3d)
> >  			goto fail;
> >  
> >  		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
> > -				     &v3d_cache_clean_sched_ops,
> > +				     &v3d_cache_clean_sched_ops, NULL,
> >  				     hw_jobs_limit, job_hang_limit,
> >  				     msecs_to_jiffies(hang_limit_ms), NULL,
> >  				     NULL, "v3d_cache_clean", v3d->drm.dev);
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index f12c5aea5294..95927c52383c 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -473,17 +473,16 @@ struct drm_sched_backend_ops {
> >   * @timeout: the time after which a job is removed from the scheduler.
> >   * @name: name of the ring for which this scheduler is being used.
> >   * @sched_rq: priority wise array of run queues.
> > - * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
> > - *                  is ready to be scheduled.
> >   * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
> >   *                 waits on this wait queue until all the scheduled jobs are
> >   *                 finished.
> >   * @hw_rq_count: the number of jobs currently in the hardware queue.
> >   * @job_id_count: used to assign unique id to the each job.
> > + * @submit_wq: workqueue used to queue @work_submit
> >   * @timeout_wq: workqueue used to queue @work_tdr
> > + * @work_submit: schedules jobs and cleans up entities
> >   * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
> >   *            timeout interval is over.
> > - * @thread: the kthread on which the scheduler which run.
> >   * @pending_list: the list of jobs which are currently in the job queue.
> >   * @job_list_lock: lock to protect the pending_list.
> >   * @hang_limit: once the hangs by a job crosses this limit then it is marked
> > @@ -492,6 +491,8 @@ struct drm_sched_backend_ops {
> >   * @_score: score used when the driver doesn't provide one
> >   * @ready: marks if the underlying HW is ready to work
> >   * @free_guilty: A hit to time out handler to free the guilty job.
> > + * @pause_submit: pause queuing of @work_submit on @submit_wq
> > + * @alloc_submit_wq: scheduler own allocation of @submit_wq
> >   * @dev: system &struct device
> >   *
> >   * One scheduler is implemented for each hardware ring.
> > @@ -502,13 +503,13 @@ struct drm_gpu_scheduler {
> >  	long				timeout;
> >  	const char			*name;
> >  	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_COUNT];
> > -	wait_queue_head_t		wake_up_worker;
> >  	wait_queue_head_t		job_scheduled;
> >  	atomic_t			hw_rq_count;
> >  	atomic64_t			job_id_count;
> > +	struct workqueue_struct		*submit_wq;
> >  	struct workqueue_struct		*timeout_wq;
> > +	struct work_struct		work_submit;
> >  	struct delayed_work		work_tdr;
> > -	struct task_struct		*thread;
> >  	struct list_head		pending_list;
> >  	spinlock_t			job_list_lock;
> >  	int				hang_limit;
> > @@ -516,11 +517,14 @@ struct drm_gpu_scheduler {
> >  	atomic_t                        _score;
> >  	bool				ready;
> >  	bool				free_guilty;
> > +	bool				pause_submit;
> > +	bool				alloc_submit_wq;
> 
> Please rename it to what it actually describes:
> 
> alloc_submit_wq --> own_submit_wq
> 
> to mean "do we own the submit wq". Then the check becomes
> the intuitive,
> 	if (sched->own_submit_wq)
> 		destroy_workqueue(sched->submit_wq);
>

Got it, agree.
 
> >  	struct device			*dev;
> >  };
> >  
> >  int drm_sched_init(struct drm_gpu_scheduler *sched,
> >  		   const struct drm_sched_backend_ops *ops,
> > +		   struct workqueue_struct *submit_wq,
> >  		   uint32_t hw_submission, unsigned hang_limit,
> >  		   long timeout, struct workqueue_struct *timeout_wq,
> >  		   atomic_t *score, const char *name, struct device *dev);
> 
> This is a good patch.

Thanks for the review.

Matt

> -- 
> Regards,
> Luben
>
Luben Tuikov Oct. 5, 2023, 4:13 a.m. UTC | #3
On 2023-10-04 23:33, Matthew Brost wrote:
> On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
>> Hi,
>>
>> On 2023-09-19 01:01, Matthew Brost wrote:
>>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
>>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
>>> seems a bit odd but let us explain the reasoning below.
>>>
>>> 1. In XE the submission order from multiple drm_sched_entity is not
>>> guaranteed to be the same completion even if targeting the same hardware
>>> engine. This is because in XE we have a firmware scheduler, the GuC,
>>> which allowed to reorder, timeslice, and preempt submissions. If a using
>>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
>>> apart as the TDR expects submission order == completion order. Using a
>>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
>>>
>>> 2. In XE submissions are done via programming a ring buffer (circular
>>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
>>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
>>> control on the ring for free.
>>>
>>> A problem with this design is currently a drm_gpu_scheduler uses a
>>> kthread for submission / job cleanup. This doesn't scale if a large
>>> number of drm_gpu_scheduler are used. To work around the scaling issue,
>>> use a worker rather than kthread for submission / job cleanup.
>>>
>>> v2:
>>>   - (Rob Clark) Fix msm build
>>>   - Pass in run work queue
>>> v3:
>>>   - (Boris) don't have loop in worker
>>> v4:
>>>   - (Tvrtko) break out submit ready, stop, start helpers into own patch
>>> v5:
>>>   - (Boris) default to ordered work queue
>>>
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>>>  drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>>>  drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>>>  drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>>>  drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>>>  drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>>>  drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>>>  drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>>>  include/drm/gpu_scheduler.h                |  14 ++-
>>>  9 files changed, 79 insertions(+), 75 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index e366f61c3aed..16f3cfe1574a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>>  			break;
>>>  		}
>>>  
>>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>>>  				   ring->num_hw_submission, 0,
>>>  				   timeout, adev->reset_domain->wq,
>>>  				   ring->sched_score, ring->name,
>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> index 345fec6cb1a4..618a804ddc34 100644
>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>  {
>>>  	int ret;
>>>  
>>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>>>  			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>>  			     msecs_to_jiffies(500), NULL, NULL,
>>>  			     dev_name(gpu->dev), gpu->dev);
>>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
>>> index ffd91a5ee299..8d858aed0e56 100644
>>> --- a/drivers/gpu/drm/lima/lima_sched.c
>>> +++ b/drivers/gpu/drm/lima/lima_sched.c
>>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>>>  
>>>  	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>>>  
>>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
>>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>>>  			      lima_job_hang_limit,
>>>  			      msecs_to_jiffies(timeout), NULL,
>>>  			      NULL, name, pipe->ldev->dev);
>>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>> index 40c0bc35a44c..b8865e61b40f 100644
>>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
>>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>>>  	 /* currently managing hangcheck ourselves: */
>>>  	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>>>  
>>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
>>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>>>  			num_hw_submissions, 0, sched_timeout,
>>>  			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
>>
>> checkpatch.pl complains here about unmatched open parens.
>>
> 
> Will fix and run checkpatch before posting next rev.
> 
>>>  	if (ret) {
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>> index 88217185e0f3..d458c2227d4f 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>>>  	if (!drm->sched_wq)
>>>  		return -ENOMEM;
>>>  
>>> -	return drm_sched_init(sched, &nouveau_sched_ops,
>>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>>>  			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>>>  			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>>>  }
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>> index 033f5e684707..326ca1ddf1d7 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>>  		js->queue[j].fence_context = dma_fence_context_alloc(1);
>>>  
>>>  		ret = drm_sched_init(&js->queue[j].sched,
>>> -				     &panfrost_sched_ops,
>>> +				     &panfrost_sched_ops, NULL,
>>>  				     nentries, 0,
>>>  				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>>>  				     pfdev->reset.wq,
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>> index e4fa62abca41..ee6281942e36 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -48,7 +48,6 @@
>>>   * through the jobs entity pointer.
>>>   */
>>>  
>>> -#include <linux/kthread.h>
>>>  #include <linux/wait.h>
>>>  #include <linux/sched.h>
>>>  #include <linux/completion.h>
>>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>>  	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>>>  }
>>>  
>>> +/**
>>> + * drm_sched_submit_queue - scheduler queue submission
>>
>> There is no verb in the description, and is not clear what
>> this function does unless one reads the code. Given that this
>> is DOC, this should be clearer here. Something like "queue
>> scheduler work to be executed" or something to that effect.
>>
> 
> Will fix.
>  
>> Coming back to this from reading the patch below, it was somewhat
>> unclear what "drm_sched_submit_queue()" does, since when reading
>> below, "submit" was being read by my mind as an adjective, as opposed
>> to a verb. Perhaps something like:
>>
>> drm_sched_queue_submit(), or
>> drm_sched_queue_exec(), or
>> drm_sched_queue_push(), or something to that effect. You pick. :-)
>>
> 
> I prefer the name as is. In this patch we have:
> 
> drm_sched_submit_queue()
> drm_sched_submit_start)
> drm_sched_submit_stop()
> drm_sched_submit_ready()
> 
> I like all these functions start with 'drm_sched_submit' which allows
> for easy searching for the functions that touch the DRM scheduler
> submission state.
> 
> With a little better doc are you fine with the names as is.

Notice the following scheme in the naming,

drm_sched_submit_queue()
drm_sched_submit_start)
drm_sched_submit_stop()
drm_sched_submit_ready()
\---+---/ \--+-/ \-+-/ 
    |        |     +---> a verb
    |        +---------> should be a noun (something in the component)
    +------------------> the kernel/software component

And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
like this:

drm_sched_submit_enqueue()

And using "submit" as the noun of the component is a bit cringy,
since "submit" is really a verb, and it's cringy to make it a "state"
or an "object" we operate on in the DRM Scheduler. "Submission" is
a noun, but "submission enqueue/start/stop/ready" doesn't sound
very well thought out. "Submission" really is what the work-queue
does.

I'd rather it be a real object, like for instance,

drm_sched_wqueue_enqueue()
drm_sched_wqueue_start)
drm_sched_wqueue_stop()
drm_sched_wqueue_ready()

Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
Plus, all these functions actually do operate on work-queues, directly or indirectly, 
are new, so it's a win-win naming scheme.

I think that that would be most likeable.
Matthew Brost Oct. 5, 2023, 3:19 p.m. UTC | #4
On Thu, Oct 05, 2023 at 12:13:01AM -0400, Luben Tuikov wrote:
> On 2023-10-04 23:33, Matthew Brost wrote:
> > On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
> >> Hi,
> >>
> >> On 2023-09-19 01:01, Matthew Brost wrote:
> >>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
> >>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
> >>> seems a bit odd but let us explain the reasoning below.
> >>>
> >>> 1. In XE the submission order from multiple drm_sched_entity is not
> >>> guaranteed to be the same completion even if targeting the same hardware
> >>> engine. This is because in XE we have a firmware scheduler, the GuC,
> >>> which allowed to reorder, timeslice, and preempt submissions. If a using
> >>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
> >>> apart as the TDR expects submission order == completion order. Using a
> >>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
> >>>
> >>> 2. In XE submissions are done via programming a ring buffer (circular
> >>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
> >>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
> >>> control on the ring for free.
> >>>
> >>> A problem with this design is currently a drm_gpu_scheduler uses a
> >>> kthread for submission / job cleanup. This doesn't scale if a large
> >>> number of drm_gpu_scheduler are used. To work around the scaling issue,
> >>> use a worker rather than kthread for submission / job cleanup.
> >>>
> >>> v2:
> >>>   - (Rob Clark) Fix msm build
> >>>   - Pass in run work queue
> >>> v3:
> >>>   - (Boris) don't have loop in worker
> >>> v4:
> >>>   - (Tvrtko) break out submit ready, stop, start helpers into own patch
> >>> v5:
> >>>   - (Boris) default to ordered work queue
> >>>
> >>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
> >>>  drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
> >>>  drivers/gpu/drm/lima/lima_sched.c          |   2 +-
> >>>  drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
> >>>  drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
> >>>  drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
> >>>  drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
> >>>  drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
> >>>  include/drm/gpu_scheduler.h                |  14 ++-
> >>>  9 files changed, 79 insertions(+), 75 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index e366f61c3aed..16f3cfe1574a 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
> >>>  			break;
> >>>  		}
> >>>  
> >>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
> >>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
> >>>  				   ring->num_hw_submission, 0,
> >>>  				   timeout, adev->reset_domain->wq,
> >>>  				   ring->sched_score, ring->name,
> >>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >>> index 345fec6cb1a4..618a804ddc34 100644
> >>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> >>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
> >>>  {
> >>>  	int ret;
> >>>  
> >>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
> >>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
> >>>  			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> >>>  			     msecs_to_jiffies(500), NULL, NULL,
> >>>  			     dev_name(gpu->dev), gpu->dev);
> >>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> >>> index ffd91a5ee299..8d858aed0e56 100644
> >>> --- a/drivers/gpu/drm/lima/lima_sched.c
> >>> +++ b/drivers/gpu/drm/lima/lima_sched.c
> >>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> >>>  
> >>>  	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
> >>>  
> >>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
> >>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
> >>>  			      lima_job_hang_limit,
> >>>  			      msecs_to_jiffies(timeout), NULL,
> >>>  			      NULL, name, pipe->ldev->dev);
> >>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> >>> index 40c0bc35a44c..b8865e61b40f 100644
> >>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> >>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> >>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
> >>>  	 /* currently managing hangcheck ourselves: */
> >>>  	sched_timeout = MAX_SCHEDULE_TIMEOUT;
> >>>  
> >>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
> >>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
> >>>  			num_hw_submissions, 0, sched_timeout,
> >>>  			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
> >>
> >> checkpatch.pl complains here about unmatched open parens.
> >>
> > 
> > Will fix and run checkpatch before posting next rev.
> > 
> >>>  	if (ret) {
> >>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
> >>> index 88217185e0f3..d458c2227d4f 100644
> >>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> >>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> >>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
> >>>  	if (!drm->sched_wq)
> >>>  		return -ENOMEM;
> >>>  
> >>> -	return drm_sched_init(sched, &nouveau_sched_ops,
> >>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
> >>>  			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
> >>>  			      NULL, NULL, "nouveau_sched", drm->dev->dev);
> >>>  }
> >>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> >>> index 033f5e684707..326ca1ddf1d7 100644
> >>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> >>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> >>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
> >>>  		js->queue[j].fence_context = dma_fence_context_alloc(1);
> >>>  
> >>>  		ret = drm_sched_init(&js->queue[j].sched,
> >>> -				     &panfrost_sched_ops,
> >>> +				     &panfrost_sched_ops, NULL,
> >>>  				     nentries, 0,
> >>>  				     msecs_to_jiffies(JOB_TIMEOUT_MS),
> >>>  				     pfdev->reset.wq,
> >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> >>> index e4fa62abca41..ee6281942e36 100644
> >>> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >>> @@ -48,7 +48,6 @@
> >>>   * through the jobs entity pointer.
> >>>   */
> >>>  
> >>> -#include <linux/kthread.h>
> >>>  #include <linux/wait.h>
> >>>  #include <linux/sched.h>
> >>>  #include <linux/completion.h>
> >>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
> >>>  	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
> >>>  }
> >>>  
> >>> +/**
> >>> + * drm_sched_submit_queue - scheduler queue submission
> >>
> >> There is no verb in the description, and is not clear what
> >> this function does unless one reads the code. Given that this
> >> is DOC, this should be clearer here. Something like "queue
> >> scheduler work to be executed" or something to that effect.
> >>
> > 
> > Will fix.
> >  
> >> Coming back to this from reading the patch below, it was somewhat
> >> unclear what "drm_sched_submit_queue()" does, since when reading
> >> below, "submit" was being read by my mind as an adjective, as opposed
> >> to a verb. Perhaps something like:
> >>
> >> drm_sched_queue_submit(), or
> >> drm_sched_queue_exec(), or
> >> drm_sched_queue_push(), or something to that effect. You pick. :-)
> >>
> > 
> > I prefer the name as is. In this patch we have:
> > 
> > drm_sched_submit_queue()
> > drm_sched_submit_start)
> > drm_sched_submit_stop()
> > drm_sched_submit_ready()
> > 
> > I like all these functions start with 'drm_sched_submit' which allows
> > for easy searching for the functions that touch the DRM scheduler
> > submission state.
> > 
> > With a little better doc are you fine with the names as is.
> 
> Notice the following scheme in the naming,
> 
> drm_sched_submit_queue()
> drm_sched_submit_start)
> drm_sched_submit_stop()
> drm_sched_submit_ready()
> \---+---/ \--+-/ \-+-/ 
>     |        |     +---> a verb
>     |        +---------> should be a noun (something in the component)
>     +------------------> the kernel/software component
> 
> And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
> like this:
> 
> drm_sched_submit_enqueue()
> 
> And using "submit" as the noun of the component is a bit cringy,
> since "submit" is really a verb, and it's cringy to make it a "state"
> or an "object" we operate on in the DRM Scheduler. "Submission" is
> a noun, but "submission enqueue/start/stop/ready" doesn't sound
> very well thought out. "Submission" really is what the work-queue
> does.
> 
> I'd rather it be a real object, like for instance,
> 
> drm_sched_wqueue_enqueue()
> drm_sched_wqueue_start)
> drm_sched_wqueue_stop()
> drm_sched_wqueue_ready()
> 
> Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
> and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
> Plus, all these functions actually do operate on work-queues, directly or indirectly, 
> are new, so it's a win-win naming scheme.
> 
> I think that that would be most likeable.

Thanks for the detailed explaination. I can adjust the names in the next rev.

Matt

> -- 
> Regards,
> Luben
>
Tvrtko Ursulin Oct. 6, 2023, 7:59 a.m. UTC | #5
On 05/10/2023 05:13, Luben Tuikov wrote:
> On 2023-10-04 23:33, Matthew Brost wrote:
>> On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
>>> Hi,
>>>
>>> On 2023-09-19 01:01, Matthew Brost wrote:
>>>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
>>>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
>>>> seems a bit odd but let us explain the reasoning below.
>>>>
>>>> 1. In XE the submission order from multiple drm_sched_entity is not
>>>> guaranteed to be the same completion even if targeting the same hardware
>>>> engine. This is because in XE we have a firmware scheduler, the GuC,
>>>> which allowed to reorder, timeslice, and preempt submissions. If a using
>>>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
>>>> apart as the TDR expects submission order == completion order. Using a
>>>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
>>>>
>>>> 2. In XE submissions are done via programming a ring buffer (circular
>>>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
>>>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
>>>> control on the ring for free.
>>>>
>>>> A problem with this design is currently a drm_gpu_scheduler uses a
>>>> kthread for submission / job cleanup. This doesn't scale if a large
>>>> number of drm_gpu_scheduler are used. To work around the scaling issue,
>>>> use a worker rather than kthread for submission / job cleanup.
>>>>
>>>> v2:
>>>>    - (Rob Clark) Fix msm build
>>>>    - Pass in run work queue
>>>> v3:
>>>>    - (Boris) don't have loop in worker
>>>> v4:
>>>>    - (Tvrtko) break out submit ready, stop, start helpers into own patch
>>>> v5:
>>>>    - (Boris) default to ordered work queue
>>>>
>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>>>>   drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>>>>   drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>>>>   drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>>>>   drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>>>>   drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>>>>   drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>>>>   drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>>>>   include/drm/gpu_scheduler.h                |  14 ++-
>>>>   9 files changed, 79 insertions(+), 75 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index e366f61c3aed..16f3cfe1574a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>>>   			break;
>>>>   		}
>>>>   
>>>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>>>>   				   ring->num_hw_submission, 0,
>>>>   				   timeout, adev->reset_domain->wq,
>>>>   				   ring->sched_score, ring->name,
>>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>> index 345fec6cb1a4..618a804ddc34 100644
>>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>>   {
>>>>   	int ret;
>>>>   
>>>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>>>>   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>>>   			     msecs_to_jiffies(500), NULL, NULL,
>>>>   			     dev_name(gpu->dev), gpu->dev);
>>>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
>>>> index ffd91a5ee299..8d858aed0e56 100644
>>>> --- a/drivers/gpu/drm/lima/lima_sched.c
>>>> +++ b/drivers/gpu/drm/lima/lima_sched.c
>>>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>>>>   
>>>>   	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>>>>   
>>>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
>>>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>>>>   			      lima_job_hang_limit,
>>>>   			      msecs_to_jiffies(timeout), NULL,
>>>>   			      NULL, name, pipe->ldev->dev);
>>>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>> index 40c0bc35a44c..b8865e61b40f 100644
>>>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>>>>   	 /* currently managing hangcheck ourselves: */
>>>>   	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>>>>   
>>>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
>>>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>>>>   			num_hw_submissions, 0, sched_timeout,
>>>>   			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
>>>
>>> checkpatch.pl complains here about unmatched open parens.
>>>
>>
>> Will fix and run checkpatch before posting next rev.
>>
>>>>   	if (ret) {
>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>> index 88217185e0f3..d458c2227d4f 100644
>>>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>>>>   	if (!drm->sched_wq)
>>>>   		return -ENOMEM;
>>>>   
>>>> -	return drm_sched_init(sched, &nouveau_sched_ops,
>>>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>>>>   			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>>>>   			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>>>>   }
>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>> index 033f5e684707..326ca1ddf1d7 100644
>>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>>>   		js->queue[j].fence_context = dma_fence_context_alloc(1);
>>>>   
>>>>   		ret = drm_sched_init(&js->queue[j].sched,
>>>> -				     &panfrost_sched_ops,
>>>> +				     &panfrost_sched_ops, NULL,
>>>>   				     nentries, 0,
>>>>   				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>>>>   				     pfdev->reset.wq,
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>> index e4fa62abca41..ee6281942e36 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>> @@ -48,7 +48,6 @@
>>>>    * through the jobs entity pointer.
>>>>    */
>>>>   
>>>> -#include <linux/kthread.h>
>>>>   #include <linux/wait.h>
>>>>   #include <linux/sched.h>
>>>>   #include <linux/completion.h>
>>>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>>>   	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>>>>   }
>>>>   
>>>> +/**
>>>> + * drm_sched_submit_queue - scheduler queue submission
>>>
>>> There is no verb in the description, and is not clear what
>>> this function does unless one reads the code. Given that this
>>> is DOC, this should be clearer here. Something like "queue
>>> scheduler work to be executed" or something to that effect.
>>>
>>
>> Will fix.
>>   
>>> Coming back to this from reading the patch below, it was somewhat
>>> unclear what "drm_sched_submit_queue()" does, since when reading
>>> below, "submit" was being read by my mind as an adjective, as opposed
>>> to a verb. Perhaps something like:
>>>
>>> drm_sched_queue_submit(), or
>>> drm_sched_queue_exec(), or
>>> drm_sched_queue_push(), or something to that effect. You pick. :-)
>>>
>>
>> I prefer the name as is. In this patch we have:
>>
>> drm_sched_submit_queue()
>> drm_sched_submit_start)
>> drm_sched_submit_stop()
>> drm_sched_submit_ready()
>>
>> I like all these functions start with 'drm_sched_submit' which allows
>> for easy searching for the functions that touch the DRM scheduler
>> submission state.
>>
>> With a little better doc are you fine with the names as is.
> 
> Notice the following scheme in the naming,
> 
> drm_sched_submit_queue()
> drm_sched_submit_start)
> drm_sched_submit_stop()
> drm_sched_submit_ready()
> \---+---/ \--+-/ \-+-/
>      |        |     +---> a verb
>      |        +---------> should be a noun (something in the component)
>      +------------------> the kernel/software component
> 
> And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
> like this:
> 
> drm_sched_submit_enqueue()
> 
> And using "submit" as the noun of the component is a bit cringy,
> since "submit" is really a verb, and it's cringy to make it a "state"
> or an "object" we operate on in the DRM Scheduler. "Submission" is
> a noun, but "submission enqueue/start/stop/ready" doesn't sound
> very well thought out. "Submission" really is what the work-queue
> does.
> 
> I'd rather it be a real object, like for instance,
> 
> drm_sched_wqueue_enqueue()
> drm_sched_wqueue_start)
> drm_sched_wqueue_stop()
> drm_sched_wqueue_ready()
> 
> Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
> and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
> Plus, all these functions actually do operate on work-queues, directly or indirectly,
> are new, so it's a win-win naming scheme.
> 
> I think that that would be most likeable.

FWIW I was suggesting not to encode the fact submit queue is implemented 
with a workqueue in the API name. IMO it would be nicer and less 
maintenance churn should something channge if the external components 
can be isolated from that detail.

drm_sched_submit_queue_$verb? If not viewed as too verbose...

Regards,

Tvrtko
Matthew Brost Oct. 6, 2023, 3:14 p.m. UTC | #6
On Fri, Oct 06, 2023 at 08:59:15AM +0100, Tvrtko Ursulin wrote:
> 
> On 05/10/2023 05:13, Luben Tuikov wrote:
> > On 2023-10-04 23:33, Matthew Brost wrote:
> > > On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
> > > > Hi,
> > > > 
> > > > On 2023-09-19 01:01, Matthew Brost wrote:
> > > > > In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
> > > > > mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
> > > > > seems a bit odd but let us explain the reasoning below.
> > > > > 
> > > > > 1. In XE the submission order from multiple drm_sched_entity is not
> > > > > guaranteed to be the same completion even if targeting the same hardware
> > > > > engine. This is because in XE we have a firmware scheduler, the GuC,
> > > > > which allowed to reorder, timeslice, and preempt submissions. If a using
> > > > > shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
> > > > > apart as the TDR expects submission order == completion order. Using a
> > > > > dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
> > > > > 
> > > > > 2. In XE submissions are done via programming a ring buffer (circular
> > > > > buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
> > > > > limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
> > > > > control on the ring for free.
> > > > > 
> > > > > A problem with this design is currently a drm_gpu_scheduler uses a
> > > > > kthread for submission / job cleanup. This doesn't scale if a large
> > > > > number of drm_gpu_scheduler are used. To work around the scaling issue,
> > > > > use a worker rather than kthread for submission / job cleanup.
> > > > > 
> > > > > v2:
> > > > >    - (Rob Clark) Fix msm build
> > > > >    - Pass in run work queue
> > > > > v3:
> > > > >    - (Boris) don't have loop in worker
> > > > > v4:
> > > > >    - (Tvrtko) break out submit ready, stop, start helpers into own patch
> > > > > v5:
> > > > >    - (Boris) default to ordered work queue
> > > > > 
> > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > > > ---
> > > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
> > > > >   drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
> > > > >   drivers/gpu/drm/lima/lima_sched.c          |   2 +-
> > > > >   drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
> > > > >   drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
> > > > >   drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
> > > > >   drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
> > > > >   drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
> > > > >   include/drm/gpu_scheduler.h                |  14 ++-
> > > > >   9 files changed, 79 insertions(+), 75 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > index e366f61c3aed..16f3cfe1574a 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
> > > > >   			break;
> > > > >   		}
> > > > > -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
> > > > > +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
> > > > >   				   ring->num_hw_submission, 0,
> > > > >   				   timeout, adev->reset_domain->wq,
> > > > >   				   ring->sched_score, ring->name,
> > > > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > > > > index 345fec6cb1a4..618a804ddc34 100644
> > > > > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > > > > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > > > > @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
> > > > >   {
> > > > >   	int ret;
> > > > > -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
> > > > > +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
> > > > >   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> > > > >   			     msecs_to_jiffies(500), NULL, NULL,
> > > > >   			     dev_name(gpu->dev), gpu->dev);
> > > > > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> > > > > index ffd91a5ee299..8d858aed0e56 100644
> > > > > --- a/drivers/gpu/drm/lima/lima_sched.c
> > > > > +++ b/drivers/gpu/drm/lima/lima_sched.c
> > > > > @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> > > > >   	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
> > > > > -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
> > > > > +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
> > > > >   			      lima_job_hang_limit,
> > > > >   			      msecs_to_jiffies(timeout), NULL,
> > > > >   			      NULL, name, pipe->ldev->dev);
> > > > > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > > > > index 40c0bc35a44c..b8865e61b40f 100644
> > > > > --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> > > > > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > > > > @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
> > > > >   	 /* currently managing hangcheck ourselves: */
> > > > >   	sched_timeout = MAX_SCHEDULE_TIMEOUT;
> > > > > -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
> > > > > +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
> > > > >   			num_hw_submissions, 0, sched_timeout,
> > > > >   			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
> > > > 
> > > > checkpatch.pl complains here about unmatched open parens.
> > > > 
> > > 
> > > Will fix and run checkpatch before posting next rev.
> > > 
> > > > >   	if (ret) {
> > > > > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > > > index 88217185e0f3..d458c2227d4f 100644
> > > > > --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > > > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > > > @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
> > > > >   	if (!drm->sched_wq)
> > > > >   		return -ENOMEM;
> > > > > -	return drm_sched_init(sched, &nouveau_sched_ops,
> > > > > +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
> > > > >   			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
> > > > >   			      NULL, NULL, "nouveau_sched", drm->dev->dev);
> > > > >   }
> > > > > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > > > > index 033f5e684707..326ca1ddf1d7 100644
> > > > > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > > > > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > > > > @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
> > > > >   		js->queue[j].fence_context = dma_fence_context_alloc(1);
> > > > >   		ret = drm_sched_init(&js->queue[j].sched,
> > > > > -				     &panfrost_sched_ops,
> > > > > +				     &panfrost_sched_ops, NULL,
> > > > >   				     nentries, 0,
> > > > >   				     msecs_to_jiffies(JOB_TIMEOUT_MS),
> > > > >   				     pfdev->reset.wq,
> > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > index e4fa62abca41..ee6281942e36 100644
> > > > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > @@ -48,7 +48,6 @@
> > > > >    * through the jobs entity pointer.
> > > > >    */
> > > > > -#include <linux/kthread.h>
> > > > >   #include <linux/wait.h>
> > > > >   #include <linux/sched.h>
> > > > >   #include <linux/completion.h>
> > > > > @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
> > > > >   	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
> > > > >   }
> > > > > +/**
> > > > > + * drm_sched_submit_queue - scheduler queue submission
> > > > 
> > > > There is no verb in the description, and is not clear what
> > > > this function does unless one reads the code. Given that this
> > > > is DOC, this should be clearer here. Something like "queue
> > > > scheduler work to be executed" or something to that effect.
> > > > 
> > > 
> > > Will fix.
> > > > Coming back to this from reading the patch below, it was somewhat
> > > > unclear what "drm_sched_submit_queue()" does, since when reading
> > > > below, "submit" was being read by my mind as an adjective, as opposed
> > > > to a verb. Perhaps something like:
> > > > 
> > > > drm_sched_queue_submit(), or
> > > > drm_sched_queue_exec(), or
> > > > drm_sched_queue_push(), or something to that effect. You pick. :-)
> > > > 
> > > 
> > > I prefer the name as is. In this patch we have:
> > > 
> > > drm_sched_submit_queue()
> > > drm_sched_submit_start)
> > > drm_sched_submit_stop()
> > > drm_sched_submit_ready()
> > > 
> > > I like all these functions start with 'drm_sched_submit' which allows
> > > for easy searching for the functions that touch the DRM scheduler
> > > submission state.
> > > 
> > > With a little better doc are you fine with the names as is.
> > 
> > Notice the following scheme in the naming,
> > 
> > drm_sched_submit_queue()
> > drm_sched_submit_start)
> > drm_sched_submit_stop()
> > drm_sched_submit_ready()
> > \---+---/ \--+-/ \-+-/
> >      |        |     +---> a verb
> >      |        +---------> should be a noun (something in the component)
> >      +------------------> the kernel/software component
> > 
> > And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
> > like this:
> > 
> > drm_sched_submit_enqueue()
> > 
> > And using "submit" as the noun of the component is a bit cringy,
> > since "submit" is really a verb, and it's cringy to make it a "state"
> > or an "object" we operate on in the DRM Scheduler. "Submission" is
> > a noun, but "submission enqueue/start/stop/ready" doesn't sound
> > very well thought out. "Submission" really is what the work-queue
> > does.
> > 
> > I'd rather it be a real object, like for instance,
> > 
> > drm_sched_wqueue_enqueue()
> > drm_sched_wqueue_start)
> > drm_sched_wqueue_stop()
> > drm_sched_wqueue_ready()
> > 

How about:

drm_sched_submission_enqueue()
drm_sched_submission_start)
drm_sched_submission_stop()
drm_sched_submission_ready()

Matt

> > Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
> > and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
> > Plus, all these functions actually do operate on work-queues, directly or indirectly,
> > are new, so it's a win-win naming scheme.
> > 
> > I think that that would be most likeable.
> 
> FWIW I was suggesting not to encode the fact submit queue is implemented
> with a workqueue in the API name. IMO it would be nicer and less maintenance
> churn should something channge if the external components can be isolated
> from that detail.
> 
> drm_sched_submit_queue_$verb? If not viewed as too verbose...
> 
> Regards,
> 
> Tvrtko
Matthew Brost Oct. 6, 2023, 11:43 p.m. UTC | #7
On Fri, Oct 06, 2023 at 03:14:04PM +0000, Matthew Brost wrote:
> On Fri, Oct 06, 2023 at 08:59:15AM +0100, Tvrtko Ursulin wrote:
> > 
> > On 05/10/2023 05:13, Luben Tuikov wrote:
> > > On 2023-10-04 23:33, Matthew Brost wrote:
> > > > On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
> > > > > Hi,
> > > > > 
> > > > > On 2023-09-19 01:01, Matthew Brost wrote:
> > > > > > In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
> > > > > > mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
> > > > > > seems a bit odd but let us explain the reasoning below.
> > > > > > 
> > > > > > 1. In XE the submission order from multiple drm_sched_entity is not
> > > > > > guaranteed to be the same completion even if targeting the same hardware
> > > > > > engine. This is because in XE we have a firmware scheduler, the GuC,
> > > > > > which allowed to reorder, timeslice, and preempt submissions. If a using
> > > > > > shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
> > > > > > apart as the TDR expects submission order == completion order. Using a
> > > > > > dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
> > > > > > 
> > > > > > 2. In XE submissions are done via programming a ring buffer (circular
> > > > > > buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
> > > > > > limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
> > > > > > control on the ring for free.
> > > > > > 
> > > > > > A problem with this design is currently a drm_gpu_scheduler uses a
> > > > > > kthread for submission / job cleanup. This doesn't scale if a large
> > > > > > number of drm_gpu_scheduler are used. To work around the scaling issue,
> > > > > > use a worker rather than kthread for submission / job cleanup.
> > > > > > 
> > > > > > v2:
> > > > > >    - (Rob Clark) Fix msm build
> > > > > >    - Pass in run work queue
> > > > > > v3:
> > > > > >    - (Boris) don't have loop in worker
> > > > > > v4:
> > > > > >    - (Tvrtko) break out submit ready, stop, start helpers into own patch
> > > > > > v5:
> > > > > >    - (Boris) default to ordered work queue
> > > > > > 
> > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > > > > ---
> > > > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
> > > > > >   drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
> > > > > >   drivers/gpu/drm/lima/lima_sched.c          |   2 +-
> > > > > >   drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
> > > > > >   drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
> > > > > >   drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
> > > > > >   drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
> > > > > >   drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
> > > > > >   include/drm/gpu_scheduler.h                |  14 ++-
> > > > > >   9 files changed, 79 insertions(+), 75 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > index e366f61c3aed..16f3cfe1574a 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
> > > > > >   			break;
> > > > > >   		}
> > > > > > -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
> > > > > > +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
> > > > > >   				   ring->num_hw_submission, 0,
> > > > > >   				   timeout, adev->reset_domain->wq,
> > > > > >   				   ring->sched_score, ring->name,
> > > > > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > > > > > index 345fec6cb1a4..618a804ddc34 100644
> > > > > > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > > > > > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> > > > > > @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
> > > > > >   {
> > > > > >   	int ret;
> > > > > > -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
> > > > > > +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
> > > > > >   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> > > > > >   			     msecs_to_jiffies(500), NULL, NULL,
> > > > > >   			     dev_name(gpu->dev), gpu->dev);
> > > > > > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> > > > > > index ffd91a5ee299..8d858aed0e56 100644
> > > > > > --- a/drivers/gpu/drm/lima/lima_sched.c
> > > > > > +++ b/drivers/gpu/drm/lima/lima_sched.c
> > > > > > @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> > > > > >   	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
> > > > > > -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
> > > > > > +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
> > > > > >   			      lima_job_hang_limit,
> > > > > >   			      msecs_to_jiffies(timeout), NULL,
> > > > > >   			      NULL, name, pipe->ldev->dev);
> > > > > > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > > > > > index 40c0bc35a44c..b8865e61b40f 100644
> > > > > > --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> > > > > > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > > > > > @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
> > > > > >   	 /* currently managing hangcheck ourselves: */
> > > > > >   	sched_timeout = MAX_SCHEDULE_TIMEOUT;
> > > > > > -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
> > > > > > +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
> > > > > >   			num_hw_submissions, 0, sched_timeout,
> > > > > >   			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
> > > > > 
> > > > > checkpatch.pl complains here about unmatched open parens.
> > > > > 
> > > > 
> > > > Will fix and run checkpatch before posting next rev.
> > > > 
> > > > > >   	if (ret) {
> > > > > > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > > > > index 88217185e0f3..d458c2227d4f 100644
> > > > > > --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > > > > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > > > > @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
> > > > > >   	if (!drm->sched_wq)
> > > > > >   		return -ENOMEM;
> > > > > > -	return drm_sched_init(sched, &nouveau_sched_ops,
> > > > > > +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
> > > > > >   			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
> > > > > >   			      NULL, NULL, "nouveau_sched", drm->dev->dev);
> > > > > >   }
> > > > > > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > > > > > index 033f5e684707..326ca1ddf1d7 100644
> > > > > > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > > > > > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > > > > > @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
> > > > > >   		js->queue[j].fence_context = dma_fence_context_alloc(1);
> > > > > >   		ret = drm_sched_init(&js->queue[j].sched,
> > > > > > -				     &panfrost_sched_ops,
> > > > > > +				     &panfrost_sched_ops, NULL,
> > > > > >   				     nentries, 0,
> > > > > >   				     msecs_to_jiffies(JOB_TIMEOUT_MS),
> > > > > >   				     pfdev->reset.wq,
> > > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > index e4fa62abca41..ee6281942e36 100644
> > > > > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > @@ -48,7 +48,6 @@
> > > > > >    * through the jobs entity pointer.
> > > > > >    */
> > > > > > -#include <linux/kthread.h>
> > > > > >   #include <linux/wait.h>
> > > > > >   #include <linux/sched.h>
> > > > > >   #include <linux/completion.h>
> > > > > > @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
> > > > > >   	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
> > > > > >   }
> > > > > > +/**
> > > > > > + * drm_sched_submit_queue - scheduler queue submission
> > > > > 
> > > > > There is no verb in the description, and is not clear what
> > > > > this function does unless one reads the code. Given that this
> > > > > is DOC, this should be clearer here. Something like "queue
> > > > > scheduler work to be executed" or something to that effect.
> > > > > 
> > > > 
> > > > Will fix.
> > > > > Coming back to this from reading the patch below, it was somewhat
> > > > > unclear what "drm_sched_submit_queue()" does, since when reading
> > > > > below, "submit" was being read by my mind as an adjective, as opposed
> > > > > to a verb. Perhaps something like:
> > > > > 
> > > > > drm_sched_queue_submit(), or
> > > > > drm_sched_queue_exec(), or
> > > > > drm_sched_queue_push(), or something to that effect. You pick. :-)
> > > > > 
> > > > 
> > > > I prefer the name as is. In this patch we have:
> > > > 
> > > > drm_sched_submit_queue()
> > > > drm_sched_submit_start)
> > > > drm_sched_submit_stop()
> > > > drm_sched_submit_ready()
> > > > 
> > > > I like all these functions start with 'drm_sched_submit' which allows
> > > > for easy searching for the functions that touch the DRM scheduler
> > > > submission state.
> > > > 
> > > > With a little better doc are you fine with the names as is.
> > > 
> > > Notice the following scheme in the naming,
> > > 
> > > drm_sched_submit_queue()
> > > drm_sched_submit_start)
> > > drm_sched_submit_stop()
> > > drm_sched_submit_ready()
> > > \---+---/ \--+-/ \-+-/
> > >      |        |     +---> a verb
> > >      |        +---------> should be a noun (something in the component)
> > >      +------------------> the kernel/software component
> > > 
> > > And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
> > > like this:
> > > 
> > > drm_sched_submit_enqueue()
> > > 
> > > And using "submit" as the noun of the component is a bit cringy,
> > > since "submit" is really a verb, and it's cringy to make it a "state"
> > > or an "object" we operate on in the DRM Scheduler. "Submission" is
> > > a noun, but "submission enqueue/start/stop/ready" doesn't sound
> > > very well thought out. "Submission" really is what the work-queue
> > > does.
> > > 
> > > I'd rather it be a real object, like for instance,
> > > 
> > > drm_sched_wqueue_enqueue()
> > > drm_sched_wqueue_start)
> > > drm_sched_wqueue_stop()
> > > drm_sched_wqueue_ready()
> > > 
> 
> How about:
> 
> drm_sched_submission_enqueue()
> drm_sched_submission_start)
> drm_sched_submission_stop()
> drm_sched_submission_ready()
> 
> Matt

Ignore this, read Tvrtko commnt and not Luben's fully.

I prefer drm_sched_wqueue over drm_sched_submit_queue as submit queue is
a made of thing. drm_sched_submission would be my top choice but if Luben
is opposed will go with drm_sched_wqueue in next rev.

Matt

> 
> > > Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
> > > and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
> > > Plus, all these functions actually do operate on work-queues, directly or indirectly,
> > > are new, so it's a win-win naming scheme.
> > > 
> > > I think that that would be most likeable.
> > 
> > FWIW I was suggesting not to encode the fact submit queue is implemented
> > with a workqueue in the API name. IMO it would be nicer and less maintenance
> > churn should something channge if the external components can be isolated
> > from that detail.
> > 
> > drm_sched_submit_queue_$verb? If not viewed as too verbose...
> > 
> > Regards,
> > 
> > Tvrtko
Tvrtko Ursulin Oct. 9, 2023, 8:35 a.m. UTC | #8
On 07/10/2023 00:43, Matthew Brost wrote:
> On Fri, Oct 06, 2023 at 03:14:04PM +0000, Matthew Brost wrote:
>> On Fri, Oct 06, 2023 at 08:59:15AM +0100, Tvrtko Ursulin wrote:
>>>
>>> On 05/10/2023 05:13, Luben Tuikov wrote:
>>>> On 2023-10-04 23:33, Matthew Brost wrote:
>>>>> On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2023-09-19 01:01, Matthew Brost wrote:
>>>>>>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
>>>>>>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
>>>>>>> seems a bit odd but let us explain the reasoning below.
>>>>>>>
>>>>>>> 1. In XE the submission order from multiple drm_sched_entity is not
>>>>>>> guaranteed to be the same completion even if targeting the same hardware
>>>>>>> engine. This is because in XE we have a firmware scheduler, the GuC,
>>>>>>> which allowed to reorder, timeslice, and preempt submissions. If a using
>>>>>>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
>>>>>>> apart as the TDR expects submission order == completion order. Using a
>>>>>>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
>>>>>>>
>>>>>>> 2. In XE submissions are done via programming a ring buffer (circular
>>>>>>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
>>>>>>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
>>>>>>> control on the ring for free.
>>>>>>>
>>>>>>> A problem with this design is currently a drm_gpu_scheduler uses a
>>>>>>> kthread for submission / job cleanup. This doesn't scale if a large
>>>>>>> number of drm_gpu_scheduler are used. To work around the scaling issue,
>>>>>>> use a worker rather than kthread for submission / job cleanup.
>>>>>>>
>>>>>>> v2:
>>>>>>>     - (Rob Clark) Fix msm build
>>>>>>>     - Pass in run work queue
>>>>>>> v3:
>>>>>>>     - (Boris) don't have loop in worker
>>>>>>> v4:
>>>>>>>     - (Tvrtko) break out submit ready, stop, start helpers into own patch
>>>>>>> v5:
>>>>>>>     - (Boris) default to ordered work queue
>>>>>>>
>>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>>>>> ---
>>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>>>>>>>    drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>>>>>>>    drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>>>>>>>    drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>>>>>>>    drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>>>>>>>    drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>>>>>>>    drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>>>>>>>    drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>>>>>>>    include/drm/gpu_scheduler.h                |  14 ++-
>>>>>>>    9 files changed, 79 insertions(+), 75 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> index e366f61c3aed..16f3cfe1574a 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>>>>>>    			break;
>>>>>>>    		}
>>>>>>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>>>>>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>>>>>>>    				   ring->num_hw_submission, 0,
>>>>>>>    				   timeout, adev->reset_domain->wq,
>>>>>>>    				   ring->sched_score, ring->name,
>>>>>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>> index 345fec6cb1a4..618a804ddc34 100644
>>>>>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>>>>>    {
>>>>>>>    	int ret;
>>>>>>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>>>>>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>>>>>>>    			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>>>>>>    			     msecs_to_jiffies(500), NULL, NULL,
>>>>>>>    			     dev_name(gpu->dev), gpu->dev);
>>>>>>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
>>>>>>> index ffd91a5ee299..8d858aed0e56 100644
>>>>>>> --- a/drivers/gpu/drm/lima/lima_sched.c
>>>>>>> +++ b/drivers/gpu/drm/lima/lima_sched.c
>>>>>>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>>>>>>>    	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>>>>>>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
>>>>>>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>>>>>>>    			      lima_job_hang_limit,
>>>>>>>    			      msecs_to_jiffies(timeout), NULL,
>>>>>>>    			      NULL, name, pipe->ldev->dev);
>>>>>>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>>> index 40c0bc35a44c..b8865e61b40f 100644
>>>>>>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>>>>>>>    	 /* currently managing hangcheck ourselves: */
>>>>>>>    	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>>>>>>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
>>>>>>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>>>>>>>    			num_hw_submissions, 0, sched_timeout,
>>>>>>>    			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
>>>>>>
>>>>>> checkpatch.pl complains here about unmatched open parens.
>>>>>>
>>>>>
>>>>> Will fix and run checkpatch before posting next rev.
>>>>>
>>>>>>>    	if (ret) {
>>>>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>>> index 88217185e0f3..d458c2227d4f 100644
>>>>>>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>>>>>>>    	if (!drm->sched_wq)
>>>>>>>    		return -ENOMEM;
>>>>>>> -	return drm_sched_init(sched, &nouveau_sched_ops,
>>>>>>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>>>>>>>    			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>>>>>>>    			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>>>>>>>    }
>>>>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>>> index 033f5e684707..326ca1ddf1d7 100644
>>>>>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>>>>>>    		js->queue[j].fence_context = dma_fence_context_alloc(1);
>>>>>>>    		ret = drm_sched_init(&js->queue[j].sched,
>>>>>>> -				     &panfrost_sched_ops,
>>>>>>> +				     &panfrost_sched_ops, NULL,
>>>>>>>    				     nentries, 0,
>>>>>>>    				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>>>>>>>    				     pfdev->reset.wq,
>>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>> index e4fa62abca41..ee6281942e36 100644
>>>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>> @@ -48,7 +48,6 @@
>>>>>>>     * through the jobs entity pointer.
>>>>>>>     */
>>>>>>> -#include <linux/kthread.h>
>>>>>>>    #include <linux/wait.h>
>>>>>>>    #include <linux/sched.h>
>>>>>>>    #include <linux/completion.h>
>>>>>>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>>>>>>    	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>>>>>>>    }
>>>>>>> +/**
>>>>>>> + * drm_sched_submit_queue - scheduler queue submission
>>>>>>
>>>>>> There is no verb in the description, and is not clear what
>>>>>> this function does unless one reads the code. Given that this
>>>>>> is DOC, this should be clearer here. Something like "queue
>>>>>> scheduler work to be executed" or something to that effect.
>>>>>>
>>>>>
>>>>> Will fix.
>>>>>> Coming back to this from reading the patch below, it was somewhat
>>>>>> unclear what "drm_sched_submit_queue()" does, since when reading
>>>>>> below, "submit" was being read by my mind as an adjective, as opposed
>>>>>> to a verb. Perhaps something like:
>>>>>>
>>>>>> drm_sched_queue_submit(), or
>>>>>> drm_sched_queue_exec(), or
>>>>>> drm_sched_queue_push(), or something to that effect. You pick. :-)
>>>>>>
>>>>>
>>>>> I prefer the name as is. In this patch we have:
>>>>>
>>>>> drm_sched_submit_queue()
>>>>> drm_sched_submit_start)
>>>>> drm_sched_submit_stop()
>>>>> drm_sched_submit_ready()
>>>>>
>>>>> I like all these functions start with 'drm_sched_submit' which allows
>>>>> for easy searching for the functions that touch the DRM scheduler
>>>>> submission state.
>>>>>
>>>>> With a little better doc are you fine with the names as is.
>>>>
>>>> Notice the following scheme in the naming,
>>>>
>>>> drm_sched_submit_queue()
>>>> drm_sched_submit_start)
>>>> drm_sched_submit_stop()
>>>> drm_sched_submit_ready()
>>>> \---+---/ \--+-/ \-+-/
>>>>       |        |     +---> a verb
>>>>       |        +---------> should be a noun (something in the component)
>>>>       +------------------> the kernel/software component
>>>>
>>>> And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
>>>> like this:
>>>>
>>>> drm_sched_submit_enqueue()
>>>>
>>>> And using "submit" as the noun of the component is a bit cringy,
>>>> since "submit" is really a verb, and it's cringy to make it a "state"
>>>> or an "object" we operate on in the DRM Scheduler. "Submission" is
>>>> a noun, but "submission enqueue/start/stop/ready" doesn't sound
>>>> very well thought out. "Submission" really is what the work-queue
>>>> does.
>>>>
>>>> I'd rather it be a real object, like for instance,
>>>>
>>>> drm_sched_wqueue_enqueue()
>>>> drm_sched_wqueue_start)
>>>> drm_sched_wqueue_stop()
>>>> drm_sched_wqueue_ready()
>>>>
>>
>> How about:
>>
>> drm_sched_submission_enqueue()
>> drm_sched_submission_start)
>> drm_sched_submission_stop()
>> drm_sched_submission_ready()
>>
>> Matt
> 
> Ignore this, read Tvrtko commnt and not Luben's fully.
> 
> I prefer drm_sched_wqueue over drm_sched_submit_queue as submit queue is
> a made of thing. drm_sched_submission would be my top choice but if Luben
> is opposed will go with drm_sched_wqueue in next rev.

I suppose you meant "made up"? All the verbs are also then made up so I 
don't really see that as an argument why implementation detail should be 
encoded into the API naming but your call folks.

Regards,

Tvrtko

>>>> Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
>>>> and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
>>>> Plus, all these functions actually do operate on work-queues, directly or indirectly,
>>>> are new, so it's a win-win naming scheme.
>>>>
>>>> I think that that would be most likeable.
>>>
>>> FWIW I was suggesting not to encode the fact submit queue is implemented
>>> with a workqueue in the API name. IMO it would be nicer and less maintenance
>>> churn should something channge if the external components can be isolated
>>> from that detail.
>>>
>>> drm_sched_submit_queue_$verb? If not viewed as too verbose...
>>>
>>> Regards,
>>>
>>> Tvrtko
Luben Tuikov Oct. 11, 2023, 11:10 p.m. UTC | #9
On 2023-10-06 03:59, Tvrtko Ursulin wrote:
> 
> On 05/10/2023 05:13, Luben Tuikov wrote:
>> On 2023-10-04 23:33, Matthew Brost wrote:
>>> On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
>>>> Hi,
>>>>
>>>> On 2023-09-19 01:01, Matthew Brost wrote:
>>>>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
>>>>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
>>>>> seems a bit odd but let us explain the reasoning below.
>>>>>
>>>>> 1. In XE the submission order from multiple drm_sched_entity is not
>>>>> guaranteed to be the same completion even if targeting the same hardware
>>>>> engine. This is because in XE we have a firmware scheduler, the GuC,
>>>>> which allowed to reorder, timeslice, and preempt submissions. If a using
>>>>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
>>>>> apart as the TDR expects submission order == completion order. Using a
>>>>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
>>>>>
>>>>> 2. In XE submissions are done via programming a ring buffer (circular
>>>>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
>>>>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
>>>>> control on the ring for free.
>>>>>
>>>>> A problem with this design is currently a drm_gpu_scheduler uses a
>>>>> kthread for submission / job cleanup. This doesn't scale if a large
>>>>> number of drm_gpu_scheduler are used. To work around the scaling issue,
>>>>> use a worker rather than kthread for submission / job cleanup.
>>>>>
>>>>> v2:
>>>>>    - (Rob Clark) Fix msm build
>>>>>    - Pass in run work queue
>>>>> v3:
>>>>>    - (Boris) don't have loop in worker
>>>>> v4:
>>>>>    - (Tvrtko) break out submit ready, stop, start helpers into own patch
>>>>> v5:
>>>>>    - (Boris) default to ordered work queue
>>>>>
>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>>>>>   drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>>>>>   drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>>>>>   drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>>>>>   drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>>>>>   drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>>>>>   drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>>>>>   drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>>>>>   include/drm/gpu_scheduler.h                |  14 ++-
>>>>>   9 files changed, 79 insertions(+), 75 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index e366f61c3aed..16f3cfe1574a 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>>>>   			break;
>>>>>   		}
>>>>>   
>>>>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>>>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>>>>>   				   ring->num_hw_submission, 0,
>>>>>   				   timeout, adev->reset_domain->wq,
>>>>>   				   ring->sched_score, ring->name,
>>>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>> index 345fec6cb1a4..618a804ddc34 100644
>>>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>>>   {
>>>>>   	int ret;
>>>>>   
>>>>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>>>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>>>>>   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>>>>   			     msecs_to_jiffies(500), NULL, NULL,
>>>>>   			     dev_name(gpu->dev), gpu->dev);
>>>>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
>>>>> index ffd91a5ee299..8d858aed0e56 100644
>>>>> --- a/drivers/gpu/drm/lima/lima_sched.c
>>>>> +++ b/drivers/gpu/drm/lima/lima_sched.c
>>>>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>>>>>   
>>>>>   	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>>>>>   
>>>>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
>>>>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>>>>>   			      lima_job_hang_limit,
>>>>>   			      msecs_to_jiffies(timeout), NULL,
>>>>>   			      NULL, name, pipe->ldev->dev);
>>>>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>> index 40c0bc35a44c..b8865e61b40f 100644
>>>>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>>>>>   	 /* currently managing hangcheck ourselves: */
>>>>>   	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>>>>>   
>>>>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
>>>>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>>>>>   			num_hw_submissions, 0, sched_timeout,
>>>>>   			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
>>>>
>>>> checkpatch.pl complains here about unmatched open parens.
>>>>
>>>
>>> Will fix and run checkpatch before posting next rev.
>>>
>>>>>   	if (ret) {
>>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>> index 88217185e0f3..d458c2227d4f 100644
>>>>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>>>>>   	if (!drm->sched_wq)
>>>>>   		return -ENOMEM;
>>>>>   
>>>>> -	return drm_sched_init(sched, &nouveau_sched_ops,
>>>>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>>>>>   			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>>>>>   			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>>>>>   }
>>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>> index 033f5e684707..326ca1ddf1d7 100644
>>>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>>>>   		js->queue[j].fence_context = dma_fence_context_alloc(1);
>>>>>   
>>>>>   		ret = drm_sched_init(&js->queue[j].sched,
>>>>> -				     &panfrost_sched_ops,
>>>>> +				     &panfrost_sched_ops, NULL,
>>>>>   				     nentries, 0,
>>>>>   				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>>>>>   				     pfdev->reset.wq,
>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>>> index e4fa62abca41..ee6281942e36 100644
>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>> @@ -48,7 +48,6 @@
>>>>>    * through the jobs entity pointer.
>>>>>    */
>>>>>   
>>>>> -#include <linux/kthread.h>
>>>>>   #include <linux/wait.h>
>>>>>   #include <linux/sched.h>
>>>>>   #include <linux/completion.h>
>>>>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>>>>   	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>>>>>   }
>>>>>   
>>>>> +/**
>>>>> + * drm_sched_submit_queue - scheduler queue submission
>>>>
>>>> There is no verb in the description, and is not clear what
>>>> this function does unless one reads the code. Given that this
>>>> is DOC, this should be clearer here. Something like "queue
>>>> scheduler work to be executed" or something to that effect.
>>>>
>>>
>>> Will fix.
>>>   
>>>> Coming back to this from reading the patch below, it was somewhat
>>>> unclear what "drm_sched_submit_queue()" does, since when reading
>>>> below, "submit" was being read by my mind as an adjective, as opposed
>>>> to a verb. Perhaps something like:
>>>>
>>>> drm_sched_queue_submit(), or
>>>> drm_sched_queue_exec(), or
>>>> drm_sched_queue_push(), or something to that effect. You pick. :-)
>>>>
>>>
>>> I prefer the name as is. In this patch we have:
>>>
>>> drm_sched_submit_queue()
>>> drm_sched_submit_start)
>>> drm_sched_submit_stop()
>>> drm_sched_submit_ready()
>>>
>>> I like all these functions start with 'drm_sched_submit' which allows
>>> for easy searching for the functions that touch the DRM scheduler
>>> submission state.
>>>
>>> With a little better doc are you fine with the names as is.
>>
>> Notice the following scheme in the naming,
>>
>> drm_sched_submit_queue()
>> drm_sched_submit_start)
>> drm_sched_submit_stop()
>> drm_sched_submit_ready()
>> \---+---/ \--+-/ \-+-/
>>      |        |     +---> a verb
>>      |        +---------> should be a noun (something in the component)
>>      +------------------> the kernel/software component
>>
>> And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
>> like this:
>>
>> drm_sched_submit_enqueue()
>>
>> And using "submit" as the noun of the component is a bit cringy,
>> since "submit" is really a verb, and it's cringy to make it a "state"
>> or an "object" we operate on in the DRM Scheduler. "Submission" is
>> a noun, but "submission enqueue/start/stop/ready" doesn't sound
>> very well thought out. "Submission" really is what the work-queue
>> does.
>>
>> I'd rather it be a real object, like for instance,
>>
>> drm_sched_wqueue_enqueue()
>> drm_sched_wqueue_start)
>> drm_sched_wqueue_stop()
>> drm_sched_wqueue_ready()
>>
>> Which tells me that the component is the DRM Scheduler, the object is a/the work-queue,
>> and the last word as the verb, is the action we're performing on the object, i.e. the work-queue.
>> Plus, all these functions actually do operate on work-queues, directly or indirectly,
>> are new, so it's a win-win naming scheme.
>>
>> I think that that would be most likeable.
> 
> FWIW I was suggesting not to encode the fact submit queue is implemented 

No. Overengineering.

> with a workqueue in the API name. IMO it would be nicer and less 
> maintenance churn should something channge if the external components 
> can be isolated from that detail.
> 
> drm_sched_submit_queue_$verb? If not viewed as too verbose...

No.

That sounds like an unnecessary overengineering: "what if". No.
Luben Tuikov Oct. 11, 2023, 11:11 p.m. UTC | #10
On 2023-10-06 11:14, Matthew Brost wrote:
> On Fri, Oct 06, 2023 at 08:59:15AM +0100, Tvrtko Ursulin wrote:
>>
>> On 05/10/2023 05:13, Luben Tuikov wrote:
>>> On 2023-10-04 23:33, Matthew Brost wrote:
>>>> On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
>>>>> Hi,
>>>>>
>>>>> On 2023-09-19 01:01, Matthew Brost wrote:
>>>>>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
>>>>>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
>>>>>> seems a bit odd but let us explain the reasoning below.
>>>>>>
>>>>>> 1. In XE the submission order from multiple drm_sched_entity is not
>>>>>> guaranteed to be the same completion even if targeting the same hardware
>>>>>> engine. This is because in XE we have a firmware scheduler, the GuC,
>>>>>> which allowed to reorder, timeslice, and preempt submissions. If a using
>>>>>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
>>>>>> apart as the TDR expects submission order == completion order. Using a
>>>>>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
>>>>>>
>>>>>> 2. In XE submissions are done via programming a ring buffer (circular
>>>>>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
>>>>>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
>>>>>> control on the ring for free.
>>>>>>
>>>>>> A problem with this design is currently a drm_gpu_scheduler uses a
>>>>>> kthread for submission / job cleanup. This doesn't scale if a large
>>>>>> number of drm_gpu_scheduler are used. To work around the scaling issue,
>>>>>> use a worker rather than kthread for submission / job cleanup.
>>>>>>
>>>>>> v2:
>>>>>>    - (Rob Clark) Fix msm build
>>>>>>    - Pass in run work queue
>>>>>> v3:
>>>>>>    - (Boris) don't have loop in worker
>>>>>> v4:
>>>>>>    - (Tvrtko) break out submit ready, stop, start helpers into own patch
>>>>>> v5:
>>>>>>    - (Boris) default to ordered work queue
>>>>>>
>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>>>>>>   drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>>>>>>   drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>>>>>>   drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>>>>>>   drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>>>>>>   drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>>>>>>   drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>>>>>>   drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>>>>>>   include/drm/gpu_scheduler.h                |  14 ++-
>>>>>>   9 files changed, 79 insertions(+), 75 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index e366f61c3aed..16f3cfe1574a 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>>>>>   			break;
>>>>>>   		}
>>>>>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>>>>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>>>>>>   				   ring->num_hw_submission, 0,
>>>>>>   				   timeout, adev->reset_domain->wq,
>>>>>>   				   ring->sched_score, ring->name,
>>>>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>> index 345fec6cb1a4..618a804ddc34 100644
>>>>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>>>>   {
>>>>>>   	int ret;
>>>>>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>>>>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>>>>>>   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>>>>>   			     msecs_to_jiffies(500), NULL, NULL,
>>>>>>   			     dev_name(gpu->dev), gpu->dev);
>>>>>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
>>>>>> index ffd91a5ee299..8d858aed0e56 100644
>>>>>> --- a/drivers/gpu/drm/lima/lima_sched.c
>>>>>> +++ b/drivers/gpu/drm/lima/lima_sched.c
>>>>>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>>>>>>   	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>>>>>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
>>>>>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>>>>>>   			      lima_job_hang_limit,
>>>>>>   			      msecs_to_jiffies(timeout), NULL,
>>>>>>   			      NULL, name, pipe->ldev->dev);
>>>>>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>> index 40c0bc35a44c..b8865e61b40f 100644
>>>>>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>>>>>>   	 /* currently managing hangcheck ourselves: */
>>>>>>   	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>>>>>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
>>>>>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>>>>>>   			num_hw_submissions, 0, sched_timeout,
>>>>>>   			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
>>>>>
>>>>> checkpatch.pl complains here about unmatched open parens.
>>>>>
>>>>
>>>> Will fix and run checkpatch before posting next rev.
>>>>
>>>>>>   	if (ret) {
>>>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>> index 88217185e0f3..d458c2227d4f 100644
>>>>>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>>>>>>   	if (!drm->sched_wq)
>>>>>>   		return -ENOMEM;
>>>>>> -	return drm_sched_init(sched, &nouveau_sched_ops,
>>>>>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>>>>>>   			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>>>>>>   			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>>>>>>   }
>>>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>> index 033f5e684707..326ca1ddf1d7 100644
>>>>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>>>>>   		js->queue[j].fence_context = dma_fence_context_alloc(1);
>>>>>>   		ret = drm_sched_init(&js->queue[j].sched,
>>>>>> -				     &panfrost_sched_ops,
>>>>>> +				     &panfrost_sched_ops, NULL,
>>>>>>   				     nentries, 0,
>>>>>>   				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>>>>>>   				     pfdev->reset.wq,
>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> index e4fa62abca41..ee6281942e36 100644
>>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> @@ -48,7 +48,6 @@
>>>>>>    * through the jobs entity pointer.
>>>>>>    */
>>>>>> -#include <linux/kthread.h>
>>>>>>   #include <linux/wait.h>
>>>>>>   #include <linux/sched.h>
>>>>>>   #include <linux/completion.h>
>>>>>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>>>>>   	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>>>>>>   }
>>>>>> +/**
>>>>>> + * drm_sched_submit_queue - scheduler queue submission
>>>>>
>>>>> There is no verb in the description, and is not clear what
>>>>> this function does unless one reads the code. Given that this
>>>>> is DOC, this should be clearer here. Something like "queue
>>>>> scheduler work to be executed" or something to that effect.
>>>>>
>>>>
>>>> Will fix.
>>>>> Coming back to this from reading the patch below, it was somewhat
>>>>> unclear what "drm_sched_submit_queue()" does, since when reading
>>>>> below, "submit" was being read by my mind as an adjective, as opposed
>>>>> to a verb. Perhaps something like:
>>>>>
>>>>> drm_sched_queue_submit(), or
>>>>> drm_sched_queue_exec(), or
>>>>> drm_sched_queue_push(), or something to that effect. You pick. :-)
>>>>>
>>>>
>>>> I prefer the name as is. In this patch we have:
>>>>
>>>> drm_sched_submit_queue()
>>>> drm_sched_submit_start)
>>>> drm_sched_submit_stop()
>>>> drm_sched_submit_ready()
>>>>
>>>> I like all these functions start with 'drm_sched_submit' which allows
>>>> for easy searching for the functions that touch the DRM scheduler
>>>> submission state.
>>>>
>>>> With a little better doc are you fine with the names as is.
>>>
>>> Notice the following scheme in the naming,
>>>
>>> drm_sched_submit_queue()
>>> drm_sched_submit_start)
>>> drm_sched_submit_stop()
>>> drm_sched_submit_ready()
>>> \---+---/ \--+-/ \-+-/
>>>      |        |     +---> a verb
>>>      |        +---------> should be a noun (something in the component)
>>>      +------------------> the kernel/software component
>>>
>>> And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
>>> like this:
>>>
>>> drm_sched_submit_enqueue()
>>>
>>> And using "submit" as the noun of the component is a bit cringy,
>>> since "submit" is really a verb, and it's cringy to make it a "state"
>>> or an "object" we operate on in the DRM Scheduler. "Submission" is
>>> a noun, but "submission enqueue/start/stop/ready" doesn't sound
>>> very well thought out. "Submission" really is what the work-queue
>>> does.
>>>
>>> I'd rather it be a real object, like for instance,
>>>
>>> drm_sched_wqueue_enqueue()
>>> drm_sched_wqueue_start)
>>> drm_sched_wqueue_stop()
>>> drm_sched_wqueue_ready()
>>>
> 
> How about:
> 
> drm_sched_submission_enqueue()
> drm_sched_submission_start)
> drm_sched_submission_stop()
> drm_sched_submission_ready()

No.
Luben Tuikov Oct. 11, 2023, 11:19 p.m. UTC | #11
On 2023-10-06 19:43, Matthew Brost wrote:
> On Fri, Oct 06, 2023 at 03:14:04PM +0000, Matthew Brost wrote:
>> On Fri, Oct 06, 2023 at 08:59:15AM +0100, Tvrtko Ursulin wrote:
>>>
>>> On 05/10/2023 05:13, Luben Tuikov wrote:
>>>> On 2023-10-04 23:33, Matthew Brost wrote:
>>>>> On Tue, Sep 26, 2023 at 11:32:10PM -0400, Luben Tuikov wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2023-09-19 01:01, Matthew Brost wrote:
>>>>>>> In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
>>>>>>> mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
>>>>>>> seems a bit odd but let us explain the reasoning below.
>>>>>>>
>>>>>>> 1. In XE the submission order from multiple drm_sched_entity is not
>>>>>>> guaranteed to be the same completion even if targeting the same hardware
>>>>>>> engine. This is because in XE we have a firmware scheduler, the GuC,
>>>>>>> which allowed to reorder, timeslice, and preempt submissions. If a using
>>>>>>> shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
>>>>>>> apart as the TDR expects submission order == completion order. Using a
>>>>>>> dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
>>>>>>>
>>>>>>> 2. In XE submissions are done via programming a ring buffer (circular
>>>>>>> buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
>>>>>>> limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
>>>>>>> control on the ring for free.
>>>>>>>
>>>>>>> A problem with this design is currently a drm_gpu_scheduler uses a
>>>>>>> kthread for submission / job cleanup. This doesn't scale if a large
>>>>>>> number of drm_gpu_scheduler are used. To work around the scaling issue,
>>>>>>> use a worker rather than kthread for submission / job cleanup.
>>>>>>>
>>>>>>> v2:
>>>>>>>    - (Rob Clark) Fix msm build
>>>>>>>    - Pass in run work queue
>>>>>>> v3:
>>>>>>>    - (Boris) don't have loop in worker
>>>>>>> v4:
>>>>>>>    - (Tvrtko) break out submit ready, stop, start helpers into own patch
>>>>>>> v5:
>>>>>>>    - (Boris) default to ordered work queue
>>>>>>>
>>>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>>>>> ---
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
>>>>>>>   drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
>>>>>>>   drivers/gpu/drm/lima/lima_sched.c          |   2 +-
>>>>>>>   drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
>>>>>>>   drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
>>>>>>>   drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
>>>>>>>   drivers/gpu/drm/scheduler/sched_main.c     | 118 ++++++++++-----------
>>>>>>>   drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
>>>>>>>   include/drm/gpu_scheduler.h                |  14 ++-
>>>>>>>   9 files changed, 79 insertions(+), 75 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> index e366f61c3aed..16f3cfe1574a 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>>>>>>   			break;
>>>>>>>   		}
>>>>>>> -		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>>>>>> +		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
>>>>>>>   				   ring->num_hw_submission, 0,
>>>>>>>   				   timeout, adev->reset_domain->wq,
>>>>>>>   				   ring->sched_score, ring->name,
>>>>>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>> index 345fec6cb1a4..618a804ddc34 100644
>>>>>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>>>>>> @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>>>>>   {
>>>>>>>   	int ret;
>>>>>>> -	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>>>>>> +	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
>>>>>>>   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>>>>>>   			     msecs_to_jiffies(500), NULL, NULL,
>>>>>>>   			     dev_name(gpu->dev), gpu->dev);
>>>>>>> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
>>>>>>> index ffd91a5ee299..8d858aed0e56 100644
>>>>>>> --- a/drivers/gpu/drm/lima/lima_sched.c
>>>>>>> +++ b/drivers/gpu/drm/lima/lima_sched.c
>>>>>>> @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>>>>>>>   	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>>>>>>> -	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
>>>>>>> +	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
>>>>>>>   			      lima_job_hang_limit,
>>>>>>>   			      msecs_to_jiffies(timeout), NULL,
>>>>>>>   			      NULL, name, pipe->ldev->dev);
>>>>>>> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>>> index 40c0bc35a44c..b8865e61b40f 100644
>>>>>>> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>>> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
>>>>>>> @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>>>>>>>   	 /* currently managing hangcheck ourselves: */
>>>>>>>   	sched_timeout = MAX_SCHEDULE_TIMEOUT;
>>>>>>> -	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
>>>>>>> +	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
>>>>>>>   			num_hw_submissions, 0, sched_timeout,
>>>>>>>   			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
>>>>>>
>>>>>> checkpatch.pl complains here about unmatched open parens.
>>>>>>
>>>>>
>>>>> Will fix and run checkpatch before posting next rev.
>>>>>
>>>>>>>   	if (ret) {
>>>>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>>> index 88217185e0f3..d458c2227d4f 100644
>>>>>>> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
>>>>>>> @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
>>>>>>>   	if (!drm->sched_wq)
>>>>>>>   		return -ENOMEM;
>>>>>>> -	return drm_sched_init(sched, &nouveau_sched_ops,
>>>>>>> +	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
>>>>>>>   			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
>>>>>>>   			      NULL, NULL, "nouveau_sched", drm->dev->dev);
>>>>>>>   }
>>>>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>>> index 033f5e684707..326ca1ddf1d7 100644
>>>>>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>>>>>> @@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>>>>>>   		js->queue[j].fence_context = dma_fence_context_alloc(1);
>>>>>>>   		ret = drm_sched_init(&js->queue[j].sched,
>>>>>>> -				     &panfrost_sched_ops,
>>>>>>> +				     &panfrost_sched_ops, NULL,
>>>>>>>   				     nentries, 0,
>>>>>>>   				     msecs_to_jiffies(JOB_TIMEOUT_MS),
>>>>>>>   				     pfdev->reset.wq,
>>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>> index e4fa62abca41..ee6281942e36 100644
>>>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>>> @@ -48,7 +48,6 @@
>>>>>>>    * through the jobs entity pointer.
>>>>>>>    */
>>>>>>> -#include <linux/kthread.h>
>>>>>>>   #include <linux/wait.h>
>>>>>>>   #include <linux/sched.h>
>>>>>>>   #include <linux/completion.h>
>>>>>>> @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>>>>>>   	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
>>>>>>>   }
>>>>>>> +/**
>>>>>>> + * drm_sched_submit_queue - scheduler queue submission
>>>>>>
>>>>>> There is no verb in the description, and is not clear what
>>>>>> this function does unless one reads the code. Given that this
>>>>>> is DOC, this should be clearer here. Something like "queue
>>>>>> scheduler work to be executed" or something to that effect.
>>>>>>
>>>>>
>>>>> Will fix.
>>>>>> Coming back to this from reading the patch below, it was somewhat
>>>>>> unclear what "drm_sched_submit_queue()" does, since when reading
>>>>>> below, "submit" was being read by my mind as an adjective, as opposed
>>>>>> to a verb. Perhaps something like:
>>>>>>
>>>>>> drm_sched_queue_submit(), or
>>>>>> drm_sched_queue_exec(), or
>>>>>> drm_sched_queue_push(), or something to that effect. You pick. :-)
>>>>>>
>>>>>
>>>>> I prefer the name as is. In this patch we have:
>>>>>
>>>>> drm_sched_submit_queue()
>>>>> drm_sched_submit_start)
>>>>> drm_sched_submit_stop()
>>>>> drm_sched_submit_ready()
>>>>>
>>>>> I like all these functions start with 'drm_sched_submit' which allows
>>>>> for easy searching for the functions that touch the DRM scheduler
>>>>> submission state.
>>>>>
>>>>> With a little better doc are you fine with the names as is.
>>>>
>>>> Notice the following scheme in the naming,
>>>>
>>>> drm_sched_submit_queue()
>>>> drm_sched_submit_start)
>>>> drm_sched_submit_stop()
>>>> drm_sched_submit_ready()
>>>> \---+---/ \--+-/ \-+-/
>>>>      |        |     +---> a verb
>>>>      |        +---------> should be a noun (something in the component)
>>>>      +------------------> the kernel/software component
>>>>
>>>> And although "queue" can technically be used as a verb too, I'd rather it be "enqueue",
>>>> like this:
>>>>
>>>> drm_sched_submit_enqueue()
>>>>
>>>> And using "submit" as the noun of the component is a bit cringy,
>>>> since "submit" is really a verb, and it's cringy to make it a "state"
>>>> or an "object" we operate on in the DRM Scheduler. "Submission" is
>>>> a noun, but "submission enqueue/start/stop/ready" doesn't sound
>>>> very well thought out. "Submission" really is what the work-queue
>>>> does.

^^^^^^^^^^^^^^^^^^^ Here ^^^^^^^^^^^^^^^^^^^^^^^^^^

>>>>
>>>> I'd rather it be a real object, like for instance,
>>>>
>>>> drm_sched_wqueue_enqueue()
>>>> drm_sched_wqueue_start)
>>>> drm_sched_wqueue_stop()
>>>> drm_sched_wqueue_ready()
>>>>
>>
>> How about:
>>
>> drm_sched_submission_enqueue()
>> drm_sched_submission_start)
>> drm_sched_submission_stop()
>> drm_sched_submission_ready()
>>
>> Matt
> 
> Ignore this, read Tvrtko commnt and not Luben's fully.
> 
> I prefer drm_sched_wqueue over drm_sched_submit_queue as submit queue is
> a made of thing. drm_sched_submission would be my top choice but if Luben
> is opposed will go with drm_sched_wqueue in next rev.

You had me at "opposed."

I think I've explained why up there.

drm_sched_wqueue_[verb]() is clear and clean. We don't need yet another
abstraction, to the abstraction, to the naming.

If we ever implement anything different than work-queue in the future,
we may split the code and decide to keep both, maybe depending on what
a driver would like to use, etc., so it's cleanest to convey what it means.

"drm_sched_submission_[verb]()" is really cringy.

Plus, it's a good reminder to know that's it's a work-queue. Keeps driver
writers informed. There is no reason to obfuscate the code.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e366f61c3aed..16f3cfe1574a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2279,7 +2279,7 @@  static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
 			break;
 		}
 
-		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
 				   ring->num_hw_submission, 0,
 				   timeout, adev->reset_domain->wq,
 				   ring->sched_score, ring->name,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 345fec6cb1a4..618a804ddc34 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -134,7 +134,7 @@  int etnaviv_sched_init(struct etnaviv_gpu *gpu)
 {
 	int ret;
 
-	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
+	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
 			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
 			     msecs_to_jiffies(500), NULL, NULL,
 			     dev_name(gpu->dev), gpu->dev);
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index ffd91a5ee299..8d858aed0e56 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -488,7 +488,7 @@  int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
 
 	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
 
-	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
+	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1,
 			      lima_job_hang_limit,
 			      msecs_to_jiffies(timeout), NULL,
 			      NULL, name, pipe->ldev->dev);
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 40c0bc35a44c..b8865e61b40f 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -94,7 +94,7 @@  struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
 	 /* currently managing hangcheck ourselves: */
 	sched_timeout = MAX_SCHEDULE_TIMEOUT;
 
-	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
+	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
 			num_hw_submissions, 0, sched_timeout,
 			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
 	if (ret) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 88217185e0f3..d458c2227d4f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -429,7 +429,7 @@  int nouveau_sched_init(struct nouveau_drm *drm)
 	if (!drm->sched_wq)
 		return -ENOMEM;
 
-	return drm_sched_init(sched, &nouveau_sched_ops,
+	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
 			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
 			      NULL, NULL, "nouveau_sched", drm->dev->dev);
 }
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 033f5e684707..326ca1ddf1d7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -831,7 +831,7 @@  int panfrost_job_init(struct panfrost_device *pfdev)
 		js->queue[j].fence_context = dma_fence_context_alloc(1);
 
 		ret = drm_sched_init(&js->queue[j].sched,
-				     &panfrost_sched_ops,
+				     &panfrost_sched_ops, NULL,
 				     nentries, 0,
 				     msecs_to_jiffies(JOB_TIMEOUT_MS),
 				     pfdev->reset.wq,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index e4fa62abca41..ee6281942e36 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -48,7 +48,6 @@ 
  * through the jobs entity pointer.
  */
 
-#include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
@@ -256,6 +255,16 @@  drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
 	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
 }
 
+/**
+ * drm_sched_submit_queue - scheduler queue submission
+ * @sched: scheduler instance
+ */
+static void drm_sched_submit_queue(struct drm_gpu_scheduler *sched)
+{
+	if (!READ_ONCE(sched->pause_submit))
+		queue_work(sched->submit_wq, &sched->work_submit);
+}
+
 /**
  * drm_sched_job_done - complete a job
  * @s_job: pointer to the job which is done
@@ -275,7 +284,7 @@  static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
 	dma_fence_get(&s_fence->finished);
 	drm_sched_fence_finished(s_fence, result);
 	dma_fence_put(&s_fence->finished);
-	wake_up_interruptible(&sched->wake_up_worker);
+	drm_sched_submit_queue(sched);
 }
 
 /**
@@ -868,7 +877,7 @@  static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
 void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
 {
 	if (drm_sched_can_queue(sched))
-		wake_up_interruptible(&sched->wake_up_worker);
+		drm_sched_submit_queue(sched);
 }
 
 /**
@@ -978,61 +987,42 @@  drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
 }
 EXPORT_SYMBOL(drm_sched_pick_best);
 
-/**
- * drm_sched_blocked - check if the scheduler is blocked
- *
- * @sched: scheduler instance
- *
- * Returns true if blocked, otherwise false.
- */
-static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
-{
-	if (kthread_should_park()) {
-		kthread_parkme();
-		return true;
-	}
-
-	return false;
-}
-
 /**
  * drm_sched_main - main scheduler thread
  *
  * @param: scheduler instance
- *
- * Returns 0.
  */
-static int drm_sched_main(void *param)
+static void drm_sched_main(struct work_struct *w)
 {
-	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
+	struct drm_gpu_scheduler *sched =
+		container_of(w, struct drm_gpu_scheduler, work_submit);
+	struct drm_sched_entity *entity;
+	struct drm_sched_job *cleanup_job;
 	int r;
 
-	sched_set_fifo_low(current);
+	if (READ_ONCE(sched->pause_submit))
+		return;
 
-	while (!kthread_should_stop()) {
-		struct drm_sched_entity *entity = NULL;
-		struct drm_sched_fence *s_fence;
-		struct drm_sched_job *sched_job;
-		struct dma_fence *fence;
-		struct drm_sched_job *cleanup_job = NULL;
+	cleanup_job = drm_sched_get_cleanup_job(sched);
+	entity = drm_sched_select_entity(sched);
 
-		wait_event_interruptible(sched->wake_up_worker,
-					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
-					 (!drm_sched_blocked(sched) &&
-					  (entity = drm_sched_select_entity(sched))) ||
-					 kthread_should_stop());
+	if (!entity && !cleanup_job)
+		return;	/* No more work */
 
-		if (cleanup_job)
-			sched->ops->free_job(cleanup_job);
+	if (cleanup_job)
+		sched->ops->free_job(cleanup_job);
 
-		if (!entity)
-			continue;
+	if (entity) {
+		struct dma_fence *fence;
+		struct drm_sched_fence *s_fence;
+		struct drm_sched_job *sched_job;
 
 		sched_job = drm_sched_entity_pop_job(entity);
-
 		if (!sched_job) {
 			complete_all(&entity->entity_idle);
-			continue;
+			if (!cleanup_job)
+				return;	/* No more work */
+			goto again;
 		}
 
 		s_fence = sched_job->s_fence;
@@ -1063,7 +1053,9 @@  static int drm_sched_main(void *param)
 
 		wake_up(&sched->job_scheduled);
 	}
-	return 0;
+
+again:
+	drm_sched_submit_queue(sched);
 }
 
 /**
@@ -1071,6 +1063,8 @@  static int drm_sched_main(void *param)
  *
  * @sched: scheduler instance
  * @ops: backend operations for this scheduler
+ * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
+ *	       allocated and used
  * @hw_submission: number of hw submissions that can be in flight
  * @hang_limit: number of times to allow a job to hang before dropping it
  * @timeout: timeout value in jiffies for the scheduler
@@ -1084,14 +1078,25 @@  static int drm_sched_main(void *param)
  */
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
+		   struct workqueue_struct *submit_wq,
 		   unsigned hw_submission, unsigned hang_limit,
 		   long timeout, struct workqueue_struct *timeout_wq,
 		   atomic_t *score, const char *name, struct device *dev)
 {
-	int i, ret;
+	int i;
 	sched->ops = ops;
 	sched->hw_submission_limit = hw_submission;
 	sched->name = name;
+	if (!submit_wq) {
+		sched->submit_wq = alloc_ordered_workqueue(name, 0);
+		if (!sched->submit_wq)
+			return -ENOMEM;
+
+		sched->alloc_submit_wq = true;
+	} else {
+		sched->submit_wq = submit_wq;
+		sched->alloc_submit_wq = false;
+	}
 	sched->timeout = timeout;
 	sched->timeout_wq = timeout_wq ? : system_wq;
 	sched->hang_limit = hang_limit;
@@ -1100,23 +1105,15 @@  int drm_sched_init(struct drm_gpu_scheduler *sched,
 	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
 		drm_sched_rq_init(sched, &sched->sched_rq[i]);
 
-	init_waitqueue_head(&sched->wake_up_worker);
 	init_waitqueue_head(&sched->job_scheduled);
 	INIT_LIST_HEAD(&sched->pending_list);
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
+	INIT_WORK(&sched->work_submit, drm_sched_main);
 	atomic_set(&sched->_score, 0);
 	atomic64_set(&sched->job_id_count, 0);
-
-	/* Each scheduler will run on a seperate kernel thread */
-	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
-	if (IS_ERR(sched->thread)) {
-		ret = PTR_ERR(sched->thread);
-		sched->thread = NULL;
-		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
-		return ret;
-	}
+	sched->pause_submit = false;
 
 	sched->ready = true;
 	return 0;
@@ -1135,8 +1132,7 @@  void drm_sched_fini(struct drm_gpu_scheduler *sched)
 	struct drm_sched_entity *s_entity;
 	int i;
 
-	if (sched->thread)
-		kthread_stop(sched->thread);
+	drm_sched_submit_stop(sched);
 
 	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 		struct drm_sched_rq *rq = &sched->sched_rq[i];
@@ -1159,6 +1155,8 @@  void drm_sched_fini(struct drm_gpu_scheduler *sched)
 	/* Confirm no work left behind accessing device structures */
 	cancel_delayed_work_sync(&sched->work_tdr);
 
+	if (sched->alloc_submit_wq)
+		destroy_workqueue(sched->submit_wq);
 	sched->ready = false;
 }
 EXPORT_SYMBOL(drm_sched_fini);
@@ -1216,7 +1214,7 @@  EXPORT_SYMBOL(drm_sched_increase_karma);
  */
 bool drm_sched_submit_ready(struct drm_gpu_scheduler *sched)
 {
-	return !!sched->thread;
+	return sched->ready;
 
 }
 EXPORT_SYMBOL(drm_sched_submit_ready);
@@ -1228,7 +1226,8 @@  EXPORT_SYMBOL(drm_sched_submit_ready);
  */
 void drm_sched_submit_stop(struct drm_gpu_scheduler *sched)
 {
-	kthread_park(sched->thread);
+	WRITE_ONCE(sched->pause_submit, true);
+	cancel_work_sync(&sched->work_submit);
 }
 EXPORT_SYMBOL(drm_sched_submit_stop);
 
@@ -1239,6 +1238,7 @@  EXPORT_SYMBOL(drm_sched_submit_stop);
  */
 void drm_sched_submit_start(struct drm_gpu_scheduler *sched)
 {
-	kthread_unpark(sched->thread);
+	WRITE_ONCE(sched->pause_submit, false);
+	queue_work(sched->submit_wq, &sched->work_submit);
 }
 EXPORT_SYMBOL(drm_sched_submit_start);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 06238e6d7f5c..38e092ea41e6 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -388,7 +388,7 @@  v3d_sched_init(struct v3d_dev *v3d)
 	int ret;
 
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
-			     &v3d_bin_sched_ops,
+			     &v3d_bin_sched_ops, NULL,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms), NULL,
 			     NULL, "v3d_bin", v3d->drm.dev);
@@ -396,7 +396,7 @@  v3d_sched_init(struct v3d_dev *v3d)
 		return ret;
 
 	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
-			     &v3d_render_sched_ops,
+			     &v3d_render_sched_ops, NULL,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms), NULL,
 			     NULL, "v3d_render", v3d->drm.dev);
@@ -404,7 +404,7 @@  v3d_sched_init(struct v3d_dev *v3d)
 		goto fail;
 
 	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
-			     &v3d_tfu_sched_ops,
+			     &v3d_tfu_sched_ops, NULL,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms), NULL,
 			     NULL, "v3d_tfu", v3d->drm.dev);
@@ -413,7 +413,7 @@  v3d_sched_init(struct v3d_dev *v3d)
 
 	if (v3d_has_csd(v3d)) {
 		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
-				     &v3d_csd_sched_ops,
+				     &v3d_csd_sched_ops, NULL,
 				     hw_jobs_limit, job_hang_limit,
 				     msecs_to_jiffies(hang_limit_ms), NULL,
 				     NULL, "v3d_csd", v3d->drm.dev);
@@ -421,7 +421,7 @@  v3d_sched_init(struct v3d_dev *v3d)
 			goto fail;
 
 		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
-				     &v3d_cache_clean_sched_ops,
+				     &v3d_cache_clean_sched_ops, NULL,
 				     hw_jobs_limit, job_hang_limit,
 				     msecs_to_jiffies(hang_limit_ms), NULL,
 				     NULL, "v3d_cache_clean", v3d->drm.dev);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index f12c5aea5294..95927c52383c 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -473,17 +473,16 @@  struct drm_sched_backend_ops {
  * @timeout: the time after which a job is removed from the scheduler.
  * @name: name of the ring for which this scheduler is being used.
  * @sched_rq: priority wise array of run queues.
- * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
- *                  is ready to be scheduled.
  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
  *                 waits on this wait queue until all the scheduled jobs are
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
+ * @submit_wq: workqueue used to queue @work_submit
  * @timeout_wq: workqueue used to queue @work_tdr
+ * @work_submit: schedules jobs and cleans up entities
  * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
  *            timeout interval is over.
- * @thread: the kthread on which the scheduler which run.
  * @pending_list: the list of jobs which are currently in the job queue.
  * @job_list_lock: lock to protect the pending_list.
  * @hang_limit: once the hangs by a job crosses this limit then it is marked
@@ -492,6 +491,8 @@  struct drm_sched_backend_ops {
  * @_score: score used when the driver doesn't provide one
  * @ready: marks if the underlying HW is ready to work
  * @free_guilty: A hit to time out handler to free the guilty job.
+ * @pause_submit: pause queuing of @work_submit on @submit_wq
+ * @alloc_submit_wq: scheduler own allocation of @submit_wq
  * @dev: system &struct device
  *
  * One scheduler is implemented for each hardware ring.
@@ -502,13 +503,13 @@  struct drm_gpu_scheduler {
 	long				timeout;
 	const char			*name;
 	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_COUNT];
-	wait_queue_head_t		wake_up_worker;
 	wait_queue_head_t		job_scheduled;
 	atomic_t			hw_rq_count;
 	atomic64_t			job_id_count;
+	struct workqueue_struct		*submit_wq;
 	struct workqueue_struct		*timeout_wq;
+	struct work_struct		work_submit;
 	struct delayed_work		work_tdr;
-	struct task_struct		*thread;
 	struct list_head		pending_list;
 	spinlock_t			job_list_lock;
 	int				hang_limit;
@@ -516,11 +517,14 @@  struct drm_gpu_scheduler {
 	atomic_t                        _score;
 	bool				ready;
 	bool				free_guilty;
+	bool				pause_submit;
+	bool				alloc_submit_wq;
 	struct device			*dev;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
+		   struct workqueue_struct *submit_wq,
 		   uint32_t hw_submission, unsigned hang_limit,
 		   long timeout, struct workqueue_struct *timeout_wq,
 		   atomic_t *score, const char *name, struct device *dev);