diff mbox series

[v4,09/18] drm/v3d: Use scheduler dependency handling

Message ID 20210712175352.802687-10-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show
Series drm/sched dependency tracking and dma-resv fixes | expand

Commit Message

Daniel Vetter July 12, 2021, 5:53 p.m. UTC
With the prep work out of the way this isn't tricky anymore.

Aside: The chaining of the various jobs is a bit awkward, with the
possibility of failure in bad places. I think with the
drm_sched_job_init/arm split and maybe preloading the
job->dependencies xarray this should be fixable.

Cc: Melissa Wen <melissa.srw@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: Cc: Emma Anholt <emma@anholt.net>
---
 drivers/gpu/drm/v3d/v3d_drv.h   |  5 -----
 drivers/gpu/drm/v3d/v3d_gem.c   | 25 ++++++++-----------------
 drivers/gpu/drm/v3d/v3d_sched.c | 29 +----------------------------
 3 files changed, 9 insertions(+), 50 deletions(-)

Comments

Melissa Wen July 14, 2021, 9:37 a.m. UTC | #1
On 07/12, Daniel Vetter wrote:
> With the prep work out of the way this isn't tricky anymore.
> 
> Aside: The chaining of the various jobs is a bit awkward, with the
> possibility of failure in bad places. I think with the
> drm_sched_job_init/arm split and maybe preloading the
> job->dependencies xarray this should be fixable.
> 
> Cc: Melissa Wen <melissa.srw@gmail.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Cc: Cc: Emma Anholt <emma@anholt.net>
> ---
>  drivers/gpu/drm/v3d/v3d_drv.h   |  5 -----
>  drivers/gpu/drm/v3d/v3d_gem.c   | 25 ++++++++-----------------
>  drivers/gpu/drm/v3d/v3d_sched.c | 29 +----------------------------
>  3 files changed, 9 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index 1d870261eaac..f80f4ff1f7aa 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -192,11 +192,6 @@ struct v3d_job {
>  	struct drm_gem_object **bo;
>  	u32 bo_count;
>  
> -	/* Array of struct dma_fence * to block on before submitting this job.
> -	 */
> -	struct xarray deps;
> -	unsigned long last_dep;
> -
>  	/* v3d fence to be signaled by IRQ handler when the job is complete. */
>  	struct dma_fence *irq_fence;
>  
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 5eccd3658938..42b07ffbea5e 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -257,8 +257,8 @@ v3d_lock_bo_reservations(struct v3d_job *job,
>  		return ret;
>  
>  	for (i = 0; i < job->bo_count; i++) {
> -		ret = drm_gem_fence_array_add_implicit(&job->deps,
> -						       job->bo[i], true);
> +		ret = drm_sched_job_await_implicit(&job->base,
> +						   job->bo[i], true);
>  		if (ret) {
>  			drm_gem_unlock_reservations(job->bo, job->bo_count,
>  						    acquire_ctx);
> @@ -354,8 +354,6 @@ static void
>  v3d_job_free(struct kref *ref)
>  {
>  	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
> -	unsigned long index;
> -	struct dma_fence *fence;
>  	int i;
>  
>  	for (i = 0; i < job->bo_count; i++) {
> @@ -364,11 +362,6 @@ v3d_job_free(struct kref *ref)
>  	}
>  	kvfree(job->bo);
>  
> -	xa_for_each(&job->deps, index, fence) {
> -		dma_fence_put(fence);
> -	}
> -	xa_destroy(&job->deps);
> -
>  	dma_fence_put(job->irq_fence);
>  	dma_fence_put(job->done_fence);
>  
> @@ -452,7 +445,6 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
>  	if (ret < 0)
>  		return ret;
>  
> -	xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
>  	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
>  				 v3d_priv);
>  	if (ret)
> @@ -462,7 +454,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
>  	if (ret == -EINVAL)
>  		goto fail_job;
>  
> -	ret = drm_gem_fence_array_add(&job->deps, in_fence);
> +	ret = drm_sched_job_await_fence(&job->base, in_fence);
>  	if (ret)
>  		goto fail_job;
>  
> @@ -472,7 +464,6 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
>  fail_job:
>  	drm_sched_job_cleanup(&job->base);
>  fail:
> -	xa_destroy(&job->deps);
>  	pm_runtime_put_autosuspend(v3d->drm.dev);
>  	return ret;
>  }
> @@ -619,8 +610,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	if (bin) {
>  		v3d_push_job(&bin->base);
>  
> -		ret = drm_gem_fence_array_add(&render->base.deps,
> -					      dma_fence_get(bin->base.done_fence));
> +		ret = drm_sched_job_await_fence(&render->base.base,
> +						dma_fence_get(bin->base.done_fence));
>  		if (ret)
>  			goto fail_unreserve;
>  	}
> @@ -630,7 +621,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	if (clean_job) {
>  		struct dma_fence *render_fence =
>  			dma_fence_get(render->base.done_fence);
> -		ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
> +		ret = drm_sched_job_await_fence(&clean_job->base, render_fence);
>  		if (ret)
>  			goto fail_unreserve;
>  		v3d_push_job(clean_job);
> @@ -820,8 +811,8 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
>  	mutex_lock(&v3d->sched_lock);
>  	v3d_push_job(&job->base);
>  
> -	ret = drm_gem_fence_array_add(&clean_job->deps,
> -				      dma_fence_get(job->base.done_fence));
> +	ret = drm_sched_job_await_fence(&clean_job->base,
> +					dma_fence_get(job->base.done_fence));
>  	if (ret)
>  		goto fail_unreserve;
>  
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 3f352d73af9c..f0de584f452c 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -13,7 +13,7 @@
>   * jobs when bulk background jobs are queued up, we submit a new job
>   * to the HW only when it has completed the last one, instead of
>   * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
> - * v3d_job_dependency() to manage the dependency between bin and
> + * drm_sched_job_await_fence() to manage the dependency between bin and
>   * render, instead of having the clients submit jobs using the HW's
>   * semaphores to interlock between them.
>   */
> @@ -62,28 +62,6 @@ v3d_sched_job_free(struct drm_sched_job *sched_job)
>  	v3d_job_cleanup(job);
>  }
>  
> -/*
> - * Returns the fences that the job depends on, one by one.
> - *
> - * If placed in the scheduler's .dependency method, the corresponding
> - * .run_job won't be called until all of them have been signaled.
> - */
> -static struct dma_fence *
> -v3d_job_dependency(struct drm_sched_job *sched_job,
> -		   struct drm_sched_entity *s_entity)
> -{
> -	struct v3d_job *job = to_v3d_job(sched_job);
> -
> -	/* XXX: Wait on a fence for switching the GMP if necessary,
> -	 * and then do so.
> -	 */
> -
> -	if (!xa_empty(&job->deps))
> -		return xa_erase(&job->deps, job->last_dep++);
> -
> -	return NULL;
> -}
> -
>  static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
>  {
>  	struct v3d_bin_job *job = to_bin_job(sched_job);
> @@ -356,35 +334,30 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job)
>  }
>  
>  static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
> -	.dependency = v3d_job_dependency,
>  	.run_job = v3d_bin_job_run,
>  	.timedout_job = v3d_bin_job_timedout,
>  	.free_job = v3d_sched_job_free,
>  };
>  
>  static const struct drm_sched_backend_ops v3d_render_sched_ops = {
> -	.dependency = v3d_job_dependency,
>  	.run_job = v3d_render_job_run,
>  	.timedout_job = v3d_render_job_timedout,
>  	.free_job = v3d_sched_job_free,
>  };
>  
>  static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
> -	.dependency = v3d_job_dependency,
>  	.run_job = v3d_tfu_job_run,
>  	.timedout_job = v3d_generic_job_timedout,
>  	.free_job = v3d_sched_job_free,
>  };
>  
>  static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
> -	.dependency = v3d_job_dependency,
>  	.run_job = v3d_csd_job_run,
>  	.timedout_job = v3d_csd_job_timedout,
>  	.free_job = v3d_sched_job_free
>  };
>  
>  static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
> -	.dependency = v3d_job_dependency,
>  	.run_job = v3d_cache_clean_job_run,
>  	.timedout_job = v3d_generic_job_timedout,
>  	.free_job = v3d_sched_job_free
Also here.

Reviewed-by: Melissa Wen <mwen@igalia.com>
> -- 
> 2.32.0
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 1d870261eaac..f80f4ff1f7aa 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -192,11 +192,6 @@  struct v3d_job {
 	struct drm_gem_object **bo;
 	u32 bo_count;
 
-	/* Array of struct dma_fence * to block on before submitting this job.
-	 */
-	struct xarray deps;
-	unsigned long last_dep;
-
 	/* v3d fence to be signaled by IRQ handler when the job is complete. */
 	struct dma_fence *irq_fence;
 
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5eccd3658938..42b07ffbea5e 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -257,8 +257,8 @@  v3d_lock_bo_reservations(struct v3d_job *job,
 		return ret;
 
 	for (i = 0; i < job->bo_count; i++) {
-		ret = drm_gem_fence_array_add_implicit(&job->deps,
-						       job->bo[i], true);
+		ret = drm_sched_job_await_implicit(&job->base,
+						   job->bo[i], true);
 		if (ret) {
 			drm_gem_unlock_reservations(job->bo, job->bo_count,
 						    acquire_ctx);
@@ -354,8 +354,6 @@  static void
 v3d_job_free(struct kref *ref)
 {
 	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
-	unsigned long index;
-	struct dma_fence *fence;
 	int i;
 
 	for (i = 0; i < job->bo_count; i++) {
@@ -364,11 +362,6 @@  v3d_job_free(struct kref *ref)
 	}
 	kvfree(job->bo);
 
-	xa_for_each(&job->deps, index, fence) {
-		dma_fence_put(fence);
-	}
-	xa_destroy(&job->deps);
-
 	dma_fence_put(job->irq_fence);
 	dma_fence_put(job->done_fence);
 
@@ -452,7 +445,6 @@  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	if (ret < 0)
 		return ret;
 
-	xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 				 v3d_priv);
 	if (ret)
@@ -462,7 +454,7 @@  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	if (ret == -EINVAL)
 		goto fail_job;
 
-	ret = drm_gem_fence_array_add(&job->deps, in_fence);
+	ret = drm_sched_job_await_fence(&job->base, in_fence);
 	if (ret)
 		goto fail_job;
 
@@ -472,7 +464,6 @@  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 fail_job:
 	drm_sched_job_cleanup(&job->base);
 fail:
-	xa_destroy(&job->deps);
 	pm_runtime_put_autosuspend(v3d->drm.dev);
 	return ret;
 }
@@ -619,8 +610,8 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (bin) {
 		v3d_push_job(&bin->base);
 
-		ret = drm_gem_fence_array_add(&render->base.deps,
-					      dma_fence_get(bin->base.done_fence));
+		ret = drm_sched_job_await_fence(&render->base.base,
+						dma_fence_get(bin->base.done_fence));
 		if (ret)
 			goto fail_unreserve;
 	}
@@ -630,7 +621,7 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (clean_job) {
 		struct dma_fence *render_fence =
 			dma_fence_get(render->base.done_fence);
-		ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
+		ret = drm_sched_job_await_fence(&clean_job->base, render_fence);
 		if (ret)
 			goto fail_unreserve;
 		v3d_push_job(clean_job);
@@ -820,8 +811,8 @@  v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&v3d->sched_lock);
 	v3d_push_job(&job->base);
 
-	ret = drm_gem_fence_array_add(&clean_job->deps,
-				      dma_fence_get(job->base.done_fence));
+	ret = drm_sched_job_await_fence(&clean_job->base,
+					dma_fence_get(job->base.done_fence));
 	if (ret)
 		goto fail_unreserve;
 
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 3f352d73af9c..f0de584f452c 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -13,7 +13,7 @@ 
  * jobs when bulk background jobs are queued up, we submit a new job
  * to the HW only when it has completed the last one, instead of
  * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
- * v3d_job_dependency() to manage the dependency between bin and
+ * drm_sched_job_await_fence() to manage the dependency between bin and
  * render, instead of having the clients submit jobs using the HW's
  * semaphores to interlock between them.
  */
@@ -62,28 +62,6 @@  v3d_sched_job_free(struct drm_sched_job *sched_job)
 	v3d_job_cleanup(job);
 }
 
-/*
- * Returns the fences that the job depends on, one by one.
- *
- * If placed in the scheduler's .dependency method, the corresponding
- * .run_job won't be called until all of them have been signaled.
- */
-static struct dma_fence *
-v3d_job_dependency(struct drm_sched_job *sched_job,
-		   struct drm_sched_entity *s_entity)
-{
-	struct v3d_job *job = to_v3d_job(sched_job);
-
-	/* XXX: Wait on a fence for switching the GMP if necessary,
-	 * and then do so.
-	 */
-
-	if (!xa_empty(&job->deps))
-		return xa_erase(&job->deps, job->last_dep++);
-
-	return NULL;
-}
-
 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_bin_job *job = to_bin_job(sched_job);
@@ -356,35 +334,30 @@  v3d_csd_job_timedout(struct drm_sched_job *sched_job)
 }
 
 static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
-	.dependency = v3d_job_dependency,
 	.run_job = v3d_bin_job_run,
 	.timedout_job = v3d_bin_job_timedout,
 	.free_job = v3d_sched_job_free,
 };
 
 static const struct drm_sched_backend_ops v3d_render_sched_ops = {
-	.dependency = v3d_job_dependency,
 	.run_job = v3d_render_job_run,
 	.timedout_job = v3d_render_job_timedout,
 	.free_job = v3d_sched_job_free,
 };
 
 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
-	.dependency = v3d_job_dependency,
 	.run_job = v3d_tfu_job_run,
 	.timedout_job = v3d_generic_job_timedout,
 	.free_job = v3d_sched_job_free,
 };
 
 static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
-	.dependency = v3d_job_dependency,
 	.run_job = v3d_csd_job_run,
 	.timedout_job = v3d_csd_job_timedout,
 	.free_job = v3d_sched_job_free
 };
 
 static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
-	.dependency = v3d_job_dependency,
 	.run_job = v3d_cache_clean_job_run,
 	.timedout_job = v3d_generic_job_timedout,
 	.free_job = v3d_sched_job_free