diff mbox series

drm/etnaviv: don't block scheduler when GPU is still active

Message ID 20240621195919.491217-1-l.stach@pengutronix.de (mailing list archive)
State New, archived
Headers show
Series drm/etnaviv: don't block scheduler when GPU is still active | expand

Commit Message

Lucas Stach June 21, 2024, 7:59 p.m. UTC
Since 45ecaea73883 ("drm/sched: Partial revert of 'drm/sched: Keep
s_fence->parent pointer'") still active jobs aren't put back in the
pending list on drm_sched_start(), as they don't have a active
parent fence anymore, so if the GPU is still working and the timeout
is extended, all currently active jobs will be freed.

To avoid prematurely freeing jobs that are still active on the GPU,
don't block the scheduler until we are fully committed to actually
reset the GPU.

As the current job is already removed from the pending list and
will not be put back when drm_sched_start() isn't called, we must
make sure to put the job back on the pending list when extending
the timeout.

Cc: stable@vger.kernel.org #6.0
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_sched.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Philipp Zabel June 24, 2024, 12:12 p.m. UTC | #1
On Fr, 2024-06-21 at 21:59 +0200, Lucas Stach wrote:
> Since 45ecaea73883 ("drm/sched: Partial revert of 'drm/sched: Keep
> s_fence->parent pointer'") still active jobs aren't put back in the
> pending list on drm_sched_start(), as they don't have a active
> parent fence anymore, so if the GPU is still working and the timeout
> is extended, all currently active jobs will be freed.
> 
> To avoid prematurely freeing jobs that are still active on the GPU,
> don't block the scheduler until we are fully committed to actually
> reset the GPU.
> 
> As the current job is already removed from the pending list and
> will not be put back when drm_sched_start() isn't called, we must
> make sure to put the job back on the pending list when extending
> the timeout.
> 
> Cc: stable@vger.kernel.org #6.0
> Signed-off-by: Lucas Stach <l.stach@pengutronix.de>

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>

regards
Philipp
Christian Gmeiner June 24, 2024, 1:18 p.m. UTC | #2
>
> Since 45ecaea73883 ("drm/sched: Partial revert of 'drm/sched: Keep
> s_fence->parent pointer'") still active jobs aren't put back in the
> pending list on drm_sched_start(), as they don't have a active
> parent fence anymore, so if the GPU is still working and the timeout
> is extended, all currently active jobs will be freed.
>
> To avoid prematurely freeing jobs that are still active on the GPU,
> don't block the scheduler until we are fully committed to actually
> reset the GPU.
>
> As the current job is already removed from the pending list and
> will not be put back when drm_sched_start() isn't called, we must
> make sure to put the job back on the pending list when extending
> the timeout.
>
> Cc: stable@vger.kernel.org #6.0
> Signed-off-by: Lucas Stach <l.stach@pengutronix.de>

Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>

> ---
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index c4b04b0dee16..62dcfdc7894d 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -38,9 +38,6 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
>         u32 dma_addr;
>         int change;
>
> -       /* block scheduler */
> -       drm_sched_stop(&gpu->sched, sched_job);
> -
>         /*
>          * If the GPU managed to complete this jobs fence, the timout is
>          * spurious. Bail out.
> @@ -63,6 +60,9 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
>                 goto out_no_timeout;
>         }
>
> +       /* block scheduler */
> +       drm_sched_stop(&gpu->sched, sched_job);
> +
>         if(sched_job)
>                 drm_sched_increase_karma(sched_job);
>
> @@ -76,8 +76,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
>         return DRM_GPU_SCHED_STAT_NOMINAL;
>
>  out_no_timeout:
> -       /* restart scheduler after GPU is usable again */
> -       drm_sched_start(&gpu->sched, true);
> +       list_add(&sched_job->list, &sched_job->sched->pending_list);
>         return DRM_GPU_SCHED_STAT_NOMINAL;
>  }
>
> --
> 2.39.2
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index c4b04b0dee16..62dcfdc7894d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -38,9 +38,6 @@  static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 	u32 dma_addr;
 	int change;
 
-	/* block scheduler */
-	drm_sched_stop(&gpu->sched, sched_job);
-
 	/*
 	 * If the GPU managed to complete this jobs fence, the timout is
 	 * spurious. Bail out.
@@ -63,6 +60,9 @@  static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 		goto out_no_timeout;
 	}
 
+	/* block scheduler */
+	drm_sched_stop(&gpu->sched, sched_job);
+
 	if(sched_job)
 		drm_sched_increase_karma(sched_job);
 
@@ -76,8 +76,7 @@  static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 	return DRM_GPU_SCHED_STAT_NOMINAL;
 
 out_no_timeout:
-	/* restart scheduler after GPU is usable again */
-	drm_sched_start(&gpu->sched, true);
+	list_add(&sched_job->list, &sched_job->sched->pending_list);
 	return DRM_GPU_SCHED_STAT_NOMINAL;
 }