diff mbox series

drm/msm/gem: Unpin objects slightly later

Message ID 20220923224043.2449152-1-robdclark@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series drm/msm/gem: Unpin objects slightly later | expand

Commit Message

Rob Clark Sept. 23, 2022, 10:40 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

The introduction of 025d27239a2f exposes a problem with f371bcc0c2ac, in
that we need to keep the object pinned in the time the submit is queued
up in the gpu scheduler.  Otherwise the shrinker will see it as a thing
that can be evicted if we wait for it to be signaled.  But if the
shrinker path is waiting on it with the obj lock held, the job cannot be
scheduled, as that also requires briefly grabbing the obj lock, leading
to deadlock.  (Not to mention, we don't want the shrinker to evict an
obj queued up in gpu scheduler.)

Fixes: f371bcc0c2ac ("drm/msm/gem: Unpin buffers earlier")
Fixes: 025d27239a2f ("drm/msm/gem: Evict active GEM objects when necessary")
Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/19
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++--
 drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

Comments

Chia-I Wu Sept. 29, 2022, 8:04 p.m. UTC | #1
On Fri, Sep 23, 2022 at 3:41 PM Rob Clark <robdclark@gmail.com> wrote:
>
> From: Rob Clark <robdclark@chromium.org>
>
> The introduction of 025d27239a2f exposes a problem with f371bcc0c2ac, in
> that we need to keep the object pinned in the time the submit is queued
> up in the gpu scheduler.  Otherwise the shrinker will see it as a thing
> that can be evicted if we wait for it to be signaled.  But if the
> shrinker path is waiting on it with the obj lock held, the job cannot be
> scheduled, as that also requires briefly grabbing the obj lock, leading
> to deadlock.  (Not to mention, we don't want the shrinker to evict an
> obj queued up in gpu scheduler.)
>
> Fixes: f371bcc0c2ac ("drm/msm/gem: Unpin buffers earlier")
> Fixes: 025d27239a2f ("drm/msm/gem: Evict active GEM objects when necessary")
> Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/19
> Signed-off-by: Rob Clark <robdclark@chromium.org>
Tested-by: Chia-I Wu <olvaffe@gmail.com>
> ---
>  drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++--
>  drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++-
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 5599d93ec0d2..c670591995e6 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -501,11 +501,11 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
>   */
>  static void submit_cleanup(struct msm_gem_submit *submit, bool error)
>  {
> -       unsigned cleanup_flags = BO_LOCKED | BO_OBJ_PINNED;
> +       unsigned cleanup_flags = BO_LOCKED;
>         unsigned i;
>
>         if (error)
> -               cleanup_flags |= BO_VMA_PINNED;
> +               cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED;
>
>         for (i = 0; i < submit->nr_bos; i++) {
>                 struct msm_gem_object *msm_obj = submit->bos[i].obj;
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> index cad4c3525f0b..57a8e9564540 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> @@ -25,7 +25,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
>
>                 msm_gem_lock(obj);
>                 msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx);
> -               submit->bos[i].flags &= ~BO_VMA_PINNED;
> +               msm_gem_unpin_locked(obj);
> +               submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED);
>                 msm_gem_unlock(obj);
>         }
>
> --
> 2.37.2
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 5599d93ec0d2..c670591995e6 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -501,11 +501,11 @@  static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
  */
 static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 {
-	unsigned cleanup_flags = BO_LOCKED | BO_OBJ_PINNED;
+	unsigned cleanup_flags = BO_LOCKED;
 	unsigned i;
 
 	if (error)
-		cleanup_flags |= BO_VMA_PINNED;
+		cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED;
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct msm_gem_object *msm_obj = submit->bos[i].obj;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index cad4c3525f0b..57a8e9564540 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -25,7 +25,8 @@  static struct dma_fence *msm_job_run(struct drm_sched_job *job)
 
 		msm_gem_lock(obj);
 		msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx);
-		submit->bos[i].flags &= ~BO_VMA_PINNED;
+		msm_gem_unpin_locked(obj);
+		submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED);
 		msm_gem_unlock(obj);
 	}