diff mbox

drm/amdgpu: fix race condition in amd_sched_entity_push_job

Message ID 1449062310-2879-1-git-send-email-nhaehnle@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nicolai Hähnle Dec. 2, 2015, 1:18 p.m. UTC
From: Nicolai Hähnle <nicolai.haehnle@amd.com>

As soon as we leave the spinlock after the job has been added to the job
queue, we can no longer rely on the job's data to be available.

I have seen a null-pointer dereference due to sched == NULL in
amd_sched_wakeup via amd_sched_entity_push_job and
amd_sched_ib_submit_kernel_helper. Since the latter initializes
sched_job->sched with the address of the ring scheduler, which is
guaranteed to be non-NULL, this race appears to be a likely culprit.

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Bugzilla: https://bugs.freedesktop.org/attachment.cgi?bugid=93079
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Comments

Alex Deucher Dec. 2, 2015, 8:04 p.m. UTC | #1
On Wed, Dec 2, 2015 at 8:18 AM, Nicolai Hähnle <nhaehnle@gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle@amd.com>
>
> As soon as we leave the spinlock after the job has been added to the job
> queue, we can no longer rely on the job's data to be available.
>
> I have seen a null-pointer dereference due to sched == NULL in
> amd_sched_wakeup via amd_sched_entity_push_job and
> amd_sched_ib_submit_kernel_helper. Since the latter initializes
> sched_job->sched with the address of the ring scheduler, which is
> guaranteed to be non-NULL, this race appears to be a likely culprit.
>
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
> Bugzilla: https://bugs.freedesktop.org/attachment.cgi?bugid=93079
> Reviewed-by: Christian König <christian.koenig@amd.com>

Applied to my -fixes tree.  Thanks!

Alex

> ---
>  drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index e13b7a0..5ace1a7 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -288,6 +288,7 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity)
>   */
>  static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
>  {
> +       struct amd_gpu_scheduler *sched = sched_job->sched;
>         struct amd_sched_entity *entity = sched_job->s_entity;
>         bool added, first = false;
>
> @@ -302,7 +303,7 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
>
>         /* first job wakes up scheduler */
>         if (first)
> -               amd_sched_wakeup(sched_job->sched);
> +               amd_sched_wakeup(sched);
>
>         return added;
>  }
> @@ -318,9 +319,9 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
>  {
>         struct amd_sched_entity *entity = sched_job->s_entity;
>
> +       trace_amd_sched_job(sched_job);
>         wait_event(entity->sched->job_scheduled,
>                    amd_sched_entity_in(sched_job));
> -       trace_amd_sched_job(sched_job);
>  }
>
>  /**
> --
> 2.5.0
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox

Patch

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index e13b7a0..5ace1a7 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -288,6 +288,7 @@  amd_sched_entity_pop_job(struct amd_sched_entity *entity)
  */
 static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 {
+	struct amd_gpu_scheduler *sched = sched_job->sched;
 	struct amd_sched_entity *entity = sched_job->s_entity;
 	bool added, first = false;
 
@@ -302,7 +303,7 @@  static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 
 	/* first job wakes up scheduler */
 	if (first)
-		amd_sched_wakeup(sched_job->sched);
+		amd_sched_wakeup(sched);
 
 	return added;
 }
@@ -318,9 +319,9 @@  void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
 	struct amd_sched_entity *entity = sched_job->s_entity;
 
+	trace_amd_sched_job(sched_job);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
-	trace_amd_sched_job(sched_job);
 }
 
 /**