diff mbox series

drm/ttm: remove ttm_bo_wait_unreserved

Message ID 20190822064921.1189-1-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show
Series drm/ttm: remove ttm_bo_wait_unreserved | expand

Commit Message

Daniel Vetter Aug. 22, 2019, 6:49 a.m. UTC
With nouveau fixed all ttm-using drives have the correct nesting of
mmap_sem vs dma_resv, and we can just lock the buffer.

Assuming I didn't screw up anything with my audit of course.

v2:
- Dont forget wu_mutex (Christian König)
- Keep the mmap_sem-less wait optimization (Thomas)
- Use _lock_interruptible to be good citizens (Thomas)

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
 drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
 drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
 include/drm/ttm/ttm_bo_api.h      |  4 ----
 4 files changed, 5 insertions(+), 54 deletions(-)

Comments

Christian König Aug. 22, 2019, 7:56 a.m. UTC | #1
Am 22.08.19 um 08:49 schrieb Daniel Vetter:
> With nouveau fixed all ttm-using drives have the correct nesting of
> mmap_sem vs dma_resv, and we can just lock the buffer.
>
> Assuming I didn't screw up anything with my audit of course.
>
> v2:
> - Dont forget wu_mutex (Christian König)
> - Keep the mmap_sem-less wait optimization (Thomas)
> - Use _lock_interruptible to be good citizens (Thomas)
>
> Reviewed-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Huang Rui <ray.huang@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
>   drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
>   drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
>   include/drm/ttm/ttm_bo_api.h      |  4 ----
>   4 files changed, 5 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 20ff56f27aa4..d1ce5d315d5b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
>   	dma_fence_put(bo->moving);
>   	if (!ttm_bo_uses_embedded_gem_object(bo))
>   		dma_resv_fini(&bo->base._resv);
> -	mutex_destroy(&bo->wu_mutex);
>   	bo->destroy(bo);
>   	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
>   }
> @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
>   	INIT_LIST_HEAD(&bo->ddestroy);
>   	INIT_LIST_HEAD(&bo->swap);
>   	INIT_LIST_HEAD(&bo->io_reserve_lru);
> -	mutex_init(&bo->wu_mutex);
>   	bo->bdev = bdev;
>   	bo->type = type;
>   	bo->num_pages = num_pages;
> @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>   		;
>   }
>   EXPORT_SYMBOL(ttm_bo_swapout_all);
> -
> -/**
> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> - * unreserved
> - *
> - * @bo: Pointer to buffer
> - */
> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> -{
> -	int ret;
> -
> -	/*
> -	 * In the absense of a wait_unlocked API,
> -	 * Use the bo::wu_mutex to avoid triggering livelocks due to
> -	 * concurrent use of this function. Note that this use of
> -	 * bo::wu_mutex can go away if we change locking order to
> -	 * mmap_sem -> bo::reserve.
> -	 */
> -	ret = mutex_lock_interruptible(&bo->wu_mutex);
> -	if (unlikely(ret != 0))
> -		return -ERESTARTSYS;
> -	if (!dma_resv_is_locked(bo->base.resv))
> -		goto out_unlock;
> -	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> -	if (ret == -EINTR)
> -		ret = -ERESTARTSYS;
> -	if (unlikely(ret != 0))
> -		goto out_unlock;
> -	dma_resv_unlock(bo->base.resv);
> -
> -out_unlock:
> -	mutex_unlock(&bo->wu_mutex);
> -	return ret;
> -}
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index fe81c565e7ef..82ea26a49959 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>   	INIT_LIST_HEAD(&fbo->base.lru);
>   	INIT_LIST_HEAD(&fbo->base.swap);
>   	INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
> -	mutex_init(&fbo->base.wu_mutex);
>   	fbo->base.moving = NULL;
>   	drm_vma_node_reset(&fbo->base.base.vma_node);
>   	atomic_set(&fbo->base.cpu_writers, 0);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 76eedb963693..a61a35e57d1c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		&bdev->man[bo->mem.mem_type];
>   	struct vm_area_struct cvma;
>   
> -	/*
> -	 * Work around locking order reversal in fault / nopfn
> -	 * between mmap_sem and bo_reserve: Perform a trylock operation
> -	 * for reserve, and if it fails, retry the fault after waiting
> -	 * for the buffer to become unreserved.
> -	 */
>   	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>   		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>   			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {

Not an expert on fault handling, but shouldn't this now be one if?

E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE 
instead of VM_FAULT_RETRY.

But really take that with a grain of salt,
Christian.

>   				ttm_bo_get(bo);
>   				up_read(&vmf->vma->vm_mm->mmap_sem);
> -				(void) ttm_bo_wait_unreserved(bo);
> +				if (!dma_resv_lock_interruptible(bo->base.resv,
> +								 NULL))
> +					dma_resv_unlock(bo->base.resv);
>   				ttm_bo_put(bo);
>   			}
>   
>   			return VM_FAULT_RETRY;
>   		}
>   
> -		/*
> -		 * If we'd want to change locking order to
> -		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> -		 * instead of retrying the fault...
> -		 */
> -		return VM_FAULT_NOPAGE;
> +		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
> +			return VM_FAULT_NOPAGE;
>   	}
>   
>   	/*
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 43c4929a2171..21c7d0d28757 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -155,7 +155,6 @@ struct ttm_tt;
>    * @offset: The current GPU offset, which can have different meanings
>    * depending on the memory type. For SYSTEM type memory, it should be 0.
>    * @cur_placement: Hint of current placement.
> - * @wu_mutex: Wait unreserved mutex.
>    *
>    * Base class for TTM buffer object, that deals with data placement and CPU
>    * mappings. GPU mappings are really up to the driver, but for simpler GPUs
> @@ -229,8 +228,6 @@ struct ttm_buffer_object {
>   	uint64_t offset; /* GPU address space is independent of CPU word size */
>   
>   	struct sg_table *sg;
> -
> -	struct mutex wu_mutex;
>   };
>   
>   /**
> @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>   int ttm_bo_swapout(struct ttm_bo_global *glob,
>   			struct ttm_operation_ctx *ctx);
>   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>   
>   /**
>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
Daniel Vetter Aug. 22, 2019, 8:47 a.m. UTC | #2
On Thu, Aug 22, 2019 at 9:56 AM Koenig, Christian
<Christian.Koenig@amd.com> wrote:
> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
> > With nouveau fixed all ttm-using drives have the correct nesting of
> > mmap_sem vs dma_resv, and we can just lock the buffer.
> >
> > Assuming I didn't screw up anything with my audit of course.
> >
> > v2:
> > - Dont forget wu_mutex (Christian König)
> > - Keep the mmap_sem-less wait optimization (Thomas)
> > - Use _lock_interruptible to be good citizens (Thomas)
> >
> > Reviewed-by: Christian König <christian.koenig@amd.com>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Cc: Huang Rui <ray.huang@amd.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> > Cc: Thomas Hellstrom <thellstrom@vmware.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
> >   drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
> >   include/drm/ttm/ttm_bo_api.h      |  4 ----
> >   4 files changed, 5 insertions(+), 54 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 20ff56f27aa4..d1ce5d315d5b 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
> >       dma_fence_put(bo->moving);
> >       if (!ttm_bo_uses_embedded_gem_object(bo))
> >               dma_resv_fini(&bo->base._resv);
> > -     mutex_destroy(&bo->wu_mutex);
> >       bo->destroy(bo);
> >       ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
> >   }
> > @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
> >       INIT_LIST_HEAD(&bo->ddestroy);
> >       INIT_LIST_HEAD(&bo->swap);
> >       INIT_LIST_HEAD(&bo->io_reserve_lru);
> > -     mutex_init(&bo->wu_mutex);
> >       bo->bdev = bdev;
> >       bo->type = type;
> >       bo->num_pages = num_pages;
> > @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
> >               ;
> >   }
> >   EXPORT_SYMBOL(ttm_bo_swapout_all);
> > -
> > -/**
> > - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> > - * unreserved
> > - *
> > - * @bo: Pointer to buffer
> > - */
> > -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> > -{
> > -     int ret;
> > -
> > -     /*
> > -      * In the absense of a wait_unlocked API,
> > -      * Use the bo::wu_mutex to avoid triggering livelocks due to
> > -      * concurrent use of this function. Note that this use of
> > -      * bo::wu_mutex can go away if we change locking order to
> > -      * mmap_sem -> bo::reserve.
> > -      */
> > -     ret = mutex_lock_interruptible(&bo->wu_mutex);
> > -     if (unlikely(ret != 0))
> > -             return -ERESTARTSYS;
> > -     if (!dma_resv_is_locked(bo->base.resv))
> > -             goto out_unlock;
> > -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> > -     if (ret == -EINTR)
> > -             ret = -ERESTARTSYS;
> > -     if (unlikely(ret != 0))
> > -             goto out_unlock;
> > -     dma_resv_unlock(bo->base.resv);
> > -
> > -out_unlock:
> > -     mutex_unlock(&bo->wu_mutex);
> > -     return ret;
> > -}
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index fe81c565e7ef..82ea26a49959 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> >       INIT_LIST_HEAD(&fbo->base.lru);
> >       INIT_LIST_HEAD(&fbo->base.swap);
> >       INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
> > -     mutex_init(&fbo->base.wu_mutex);
> >       fbo->base.moving = NULL;
> >       drm_vma_node_reset(&fbo->base.base.vma_node);
> >       atomic_set(&fbo->base.cpu_writers, 0);
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index 76eedb963693..a61a35e57d1c 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> >               &bdev->man[bo->mem.mem_type];
> >       struct vm_area_struct cvma;
> >
> > -     /*
> > -      * Work around locking order reversal in fault / nopfn
> > -      * between mmap_sem and bo_reserve: Perform a trylock operation
> > -      * for reserve, and if it fails, retry the fault after waiting
> > -      * for the buffer to become unreserved.
> > -      */
> >       if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >               if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >                       if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>
> Not an expert on fault handling, but shouldn't this now be one if?
>
> E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE
> instead of VM_FAULT_RETRY.

Honestly I have no idea at all about this stuff. I just learned about
the mmap_sem less retry now that Thomas pointed it out, and I have no
idea how anything else here works. My approach has always been to just
throw ridiculous amounts of really nasty tests at fault handling
(including handling our own gtt mmaps to copy*user in relocs or gup
for userptr and all that), and leave it at that :-)

> But really take that with a grain of salt,

No idea either. It should be functionally equivalent to what was there
before, except we now have the full blocking wait for the mutex
instead of busy-spinning on NO_PAGE (with the wait_unreserved mixed in
every odd fault I guess?). All over my head I'd say ...

Cheers, Daniel

> Christian.
>
> >                               ttm_bo_get(bo);
> >                               up_read(&vmf->vma->vm_mm->mmap_sem);
> > -                             (void) ttm_bo_wait_unreserved(bo);
> > +                             if (!dma_resv_lock_interruptible(bo->base.resv,
> > +                                                              NULL))
> > +                                     dma_resv_unlock(bo->base.resv);
> >                               ttm_bo_put(bo);
> >                       }
> >
> >                       return VM_FAULT_RETRY;
> >               }
> >
> > -             /*
> > -              * If we'd want to change locking order to
> > -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> > -              * instead of retrying the fault...
> > -              */
> > -             return VM_FAULT_NOPAGE;
> > +             if (dma_resv_lock_interruptible(bo->base.resv, NULL))
> > +                     return VM_FAULT_NOPAGE;
> >       }
> >
> >       /*
> > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> > index 43c4929a2171..21c7d0d28757 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -155,7 +155,6 @@ struct ttm_tt;
> >    * @offset: The current GPU offset, which can have different meanings
> >    * depending on the memory type. For SYSTEM type memory, it should be 0.
> >    * @cur_placement: Hint of current placement.
> > - * @wu_mutex: Wait unreserved mutex.
> >    *
> >    * Base class for TTM buffer object, that deals with data placement and CPU
> >    * mappings. GPU mappings are really up to the driver, but for simpler GPUs
> > @@ -229,8 +228,6 @@ struct ttm_buffer_object {
> >       uint64_t offset; /* GPU address space is independent of CPU word size */
> >
> >       struct sg_table *sg;
> > -
> > -     struct mutex wu_mutex;
> >   };
> >
> >   /**
> > @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
> >   int ttm_bo_swapout(struct ttm_bo_global *glob,
> >                       struct ttm_operation_ctx *ctx);
> >   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> > -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> >
> >   /**
> >    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
Thomas Hellström (Intel) Aug. 22, 2019, 9:53 a.m. UTC | #3
On 8/22/19 10:47 AM, Daniel Vetter wrote:
> On Thu, Aug 22, 2019 at 9:56 AM Koenig, Christian
> <Christian.Koenig@amd.com> wrote:
>> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>
>>> Assuming I didn't screw up anything with my audit of course.
>>>
>>> v2:
>>> - Dont forget wu_mutex (Christian König)
>>> - Keep the mmap_sem-less wait optimization (Thomas)
>>> - Use _lock_interruptible to be good citizens (Thomas)
>>>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Cc: Huang Rui <ray.huang@amd.com>
>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
>>>    drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
>>>    include/drm/ttm/ttm_bo_api.h      |  4 ----
>>>    4 files changed, 5 insertions(+), 54 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 20ff56f27aa4..d1ce5d315d5b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
>>>        dma_fence_put(bo->moving);
>>>        if (!ttm_bo_uses_embedded_gem_object(bo))
>>>                dma_resv_fini(&bo->base._resv);
>>> -     mutex_destroy(&bo->wu_mutex);
>>>        bo->destroy(bo);
>>>        ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
>>>    }
>>> @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
>>>        INIT_LIST_HEAD(&bo->ddestroy);
>>>        INIT_LIST_HEAD(&bo->swap);
>>>        INIT_LIST_HEAD(&bo->io_reserve_lru);
>>> -     mutex_init(&bo->wu_mutex);
>>>        bo->bdev = bdev;
>>>        bo->type = type;
>>>        bo->num_pages = num_pages;
>>> @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>>>                ;
>>>    }
>>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
>>> -
>>> -/**
>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
>>> - * unreserved
>>> - *
>>> - * @bo: Pointer to buffer
>>> - */
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>> -{
>>> -     int ret;
>>> -
>>> -     /*
>>> -      * In the absense of a wait_unlocked API,
>>> -      * Use the bo::wu_mutex to avoid triggering livelocks due to
>>> -      * concurrent use of this function. Note that this use of
>>> -      * bo::wu_mutex can go away if we change locking order to
>>> -      * mmap_sem -> bo::reserve.
>>> -      */
>>> -     ret = mutex_lock_interruptible(&bo->wu_mutex);
>>> -     if (unlikely(ret != 0))
>>> -             return -ERESTARTSYS;
>>> -     if (!dma_resv_is_locked(bo->base.resv))
>>> -             goto out_unlock;
>>> -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>> -     if (ret == -EINTR)
>>> -             ret = -ERESTARTSYS;
>>> -     if (unlikely(ret != 0))
>>> -             goto out_unlock;
>>> -     dma_resv_unlock(bo->base.resv);
>>> -
>>> -out_unlock:
>>> -     mutex_unlock(&bo->wu_mutex);
>>> -     return ret;
>>> -}
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> index fe81c565e7ef..82ea26a49959 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>>>        INIT_LIST_HEAD(&fbo->base.lru);
>>>        INIT_LIST_HEAD(&fbo->base.swap);
>>>        INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
>>> -     mutex_init(&fbo->base.wu_mutex);
>>>        fbo->base.moving = NULL;
>>>        drm_vma_node_reset(&fbo->base.base.vma_node);
>>>        atomic_set(&fbo->base.cpu_writers, 0);
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> index 76eedb963693..a61a35e57d1c 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>                &bdev->man[bo->mem.mem_type];
>>>        struct vm_area_struct cvma;
>>>
>>> -     /*
>>> -      * Work around locking order reversal in fault / nopfn
>>> -      * between mmap_sem and bo_reserve: Perform a trylock operation
>>> -      * for reserve, and if it fails, retry the fault after waiting
>>> -      * for the buffer to become unreserved.
>>> -      */
>>>        if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>                if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>                        if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>> Not an expert on fault handling, but shouldn't this now be one if?
>>
>> E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE
>> instead of VM_FAULT_RETRY.
> Honestly I have no idea at all about this stuff. I just learned about
> the mmap_sem less retry now that Thomas pointed it out, and I have no
> idea how anything else here works. My approach has always been to just
> throw ridiculous amounts of really nasty tests at fault handling
> (including handling our own gtt mmaps to copy*user in relocs or gup
> for userptr and all that), and leave it at that :-)
>
>> But really take that with a grain of salt,
> No idea either. It should be functionally equivalent to what was there
> before, except we now have the full blocking wait for the mutex
> instead of busy-spinning on NO_PAGE (with the wait_unreserved mixed in
> every odd fault I guess?). All over my head I'd say ...

To be honest I don't remember the difference about VM_FAULT_RETRY with 
!FAULT_FLAG_RETRY_NOWAIT and just returning VM_FAULT_NOPAGE. It appears 
most users and TTM use the former, while shmem uses the latter.

The detailed FAULT_RETRY semantics are pretty undocumented and requires 
diving into the mm system to get the full picture.

BTW it looks to me like vgem and vmks has got VM_FAULT_RETRY wrong, 
since they might return it
without ALLOW_RETRY, and if FAULT_FLAG_RETRY_NOWAIT is true they, should 
drop the mmap_sem,
otherwise things will go really bad.

/Thomas


>
> Cheers, Daniel
>
>> Christian.
>>
>>>                                ttm_bo_get(bo);
>>>                                up_read(&vmf->vma->vm_mm->mmap_sem);
>>> -                             (void) ttm_bo_wait_unreserved(bo);
>>> +                             if (!dma_resv_lock_interruptible(bo->base.resv,
>>> +                                                              NULL))
>>> +                                     dma_resv_unlock(bo->base.resv);
>>>                                ttm_bo_put(bo);
>>>                        }
>>>
>>>                        return VM_FAULT_RETRY;
>>>                }
>>>
>>> -             /*
>>> -              * If we'd want to change locking order to
>>> -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>> -              * instead of retrying the fault...
>>> -              */
>>> -             return VM_FAULT_NOPAGE;
>>> +             if (dma_resv_lock_interruptible(bo->base.resv, NULL))
>>> +                     return VM_FAULT_NOPAGE;
>>>        }
>>>
>>>        /*
>>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>>> index 43c4929a2171..21c7d0d28757 100644
>>> --- a/include/drm/ttm/ttm_bo_api.h
>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>> @@ -155,7 +155,6 @@ struct ttm_tt;
>>>     * @offset: The current GPU offset, which can have different meanings
>>>     * depending on the memory type. For SYSTEM type memory, it should be 0.
>>>     * @cur_placement: Hint of current placement.
>>> - * @wu_mutex: Wait unreserved mutex.
>>>     *
>>>     * Base class for TTM buffer object, that deals with data placement and CPU
>>>     * mappings. GPU mappings are really up to the driver, but for simpler GPUs
>>> @@ -229,8 +228,6 @@ struct ttm_buffer_object {
>>>        uint64_t offset; /* GPU address space is independent of CPU word size */
>>>
>>>        struct sg_table *sg;
>>> -
>>> -     struct mutex wu_mutex;
>>>    };
>>>
>>>    /**
>>> @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>>>    int ttm_bo_swapout(struct ttm_bo_global *glob,
>>>                        struct ttm_operation_ctx *ctx);
>>>    void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>>
>>>    /**
>>>     * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
Daniel Vetter Aug. 22, 2019, 1:06 p.m. UTC | #4
On Thu, Aug 22, 2019 at 07:56:56AM +0000, Koenig, Christian wrote:
> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
> > With nouveau fixed all ttm-using drives have the correct nesting of
> > mmap_sem vs dma_resv, and we can just lock the buffer.
> >
> > Assuming I didn't screw up anything with my audit of course.
> >
> > v2:
> > - Dont forget wu_mutex (Christian König)
> > - Keep the mmap_sem-less wait optimization (Thomas)
> > - Use _lock_interruptible to be good citizens (Thomas)
> >
> > Reviewed-by: Christian König <christian.koenig@amd.com>

btw I realized I didn't remove your r-b, since v1 was broken.

For formality, can you pls reaffirm, or still something broken?

Also from the other thread: Reviewed-by: Thomas Hellström <thellstrom@vmware.com>

Thanks, Daniel

> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Cc: Huang Rui <ray.huang@amd.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> > Cc: Thomas Hellstrom <thellstrom@vmware.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
> >   drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
> >   include/drm/ttm/ttm_bo_api.h      |  4 ----
> >   4 files changed, 5 insertions(+), 54 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 20ff56f27aa4..d1ce5d315d5b 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
> >   	dma_fence_put(bo->moving);
> >   	if (!ttm_bo_uses_embedded_gem_object(bo))
> >   		dma_resv_fini(&bo->base._resv);
> > -	mutex_destroy(&bo->wu_mutex);
> >   	bo->destroy(bo);
> >   	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
> >   }
> > @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
> >   	INIT_LIST_HEAD(&bo->ddestroy);
> >   	INIT_LIST_HEAD(&bo->swap);
> >   	INIT_LIST_HEAD(&bo->io_reserve_lru);
> > -	mutex_init(&bo->wu_mutex);
> >   	bo->bdev = bdev;
> >   	bo->type = type;
> >   	bo->num_pages = num_pages;
> > @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
> >   		;
> >   }
> >   EXPORT_SYMBOL(ttm_bo_swapout_all);
> > -
> > -/**
> > - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> > - * unreserved
> > - *
> > - * @bo: Pointer to buffer
> > - */
> > -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> > -{
> > -	int ret;
> > -
> > -	/*
> > -	 * In the absense of a wait_unlocked API,
> > -	 * Use the bo::wu_mutex to avoid triggering livelocks due to
> > -	 * concurrent use of this function. Note that this use of
> > -	 * bo::wu_mutex can go away if we change locking order to
> > -	 * mmap_sem -> bo::reserve.
> > -	 */
> > -	ret = mutex_lock_interruptible(&bo->wu_mutex);
> > -	if (unlikely(ret != 0))
> > -		return -ERESTARTSYS;
> > -	if (!dma_resv_is_locked(bo->base.resv))
> > -		goto out_unlock;
> > -	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> > -	if (ret == -EINTR)
> > -		ret = -ERESTARTSYS;
> > -	if (unlikely(ret != 0))
> > -		goto out_unlock;
> > -	dma_resv_unlock(bo->base.resv);
> > -
> > -out_unlock:
> > -	mutex_unlock(&bo->wu_mutex);
> > -	return ret;
> > -}
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > index fe81c565e7ef..82ea26a49959 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> > @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> >   	INIT_LIST_HEAD(&fbo->base.lru);
> >   	INIT_LIST_HEAD(&fbo->base.swap);
> >   	INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
> > -	mutex_init(&fbo->base.wu_mutex);
> >   	fbo->base.moving = NULL;
> >   	drm_vma_node_reset(&fbo->base.base.vma_node);
> >   	atomic_set(&fbo->base.cpu_writers, 0);
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index 76eedb963693..a61a35e57d1c 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> >   		&bdev->man[bo->mem.mem_type];
> >   	struct vm_area_struct cvma;
> >   
> > -	/*
> > -	 * Work around locking order reversal in fault / nopfn
> > -	 * between mmap_sem and bo_reserve: Perform a trylock operation
> > -	 * for reserve, and if it fails, retry the fault after waiting
> > -	 * for the buffer to become unreserved.
> > -	 */
> >   	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >   		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >   			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> 
> Not an expert on fault handling, but shouldn't this now be one if?
> 
> E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE 
> instead of VM_FAULT_RETRY.
> 
> But really take that with a grain of salt,
> Christian.
> 
> >   				ttm_bo_get(bo);
> >   				up_read(&vmf->vma->vm_mm->mmap_sem);
> > -				(void) ttm_bo_wait_unreserved(bo);
> > +				if (!dma_resv_lock_interruptible(bo->base.resv,
> > +								 NULL))
> > +					dma_resv_unlock(bo->base.resv);
> >   				ttm_bo_put(bo);
> >   			}
> >   
> >   			return VM_FAULT_RETRY;
> >   		}
> >   
> > -		/*
> > -		 * If we'd want to change locking order to
> > -		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> > -		 * instead of retrying the fault...
> > -		 */
> > -		return VM_FAULT_NOPAGE;
> > +		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
> > +			return VM_FAULT_NOPAGE;
> >   	}
> >   
> >   	/*
> > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> > index 43c4929a2171..21c7d0d28757 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -155,7 +155,6 @@ struct ttm_tt;
> >    * @offset: The current GPU offset, which can have different meanings
> >    * depending on the memory type. For SYSTEM type memory, it should be 0.
> >    * @cur_placement: Hint of current placement.
> > - * @wu_mutex: Wait unreserved mutex.
> >    *
> >    * Base class for TTM buffer object, that deals with data placement and CPU
> >    * mappings. GPU mappings are really up to the driver, but for simpler GPUs
> > @@ -229,8 +228,6 @@ struct ttm_buffer_object {
> >   	uint64_t offset; /* GPU address space is independent of CPU word size */
> >   
> >   	struct sg_table *sg;
> > -
> > -	struct mutex wu_mutex;
> >   };
> >   
> >   /**
> > @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
> >   int ttm_bo_swapout(struct ttm_bo_global *glob,
> >   			struct ttm_operation_ctx *ctx);
> >   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> > -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> >   
> >   /**
> >    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
Christian König Aug. 22, 2019, 2:02 p.m. UTC | #5
Am 22.08.19 um 15:06 schrieb Daniel Vetter:
> On Thu, Aug 22, 2019 at 07:56:56AM +0000, Koenig, Christian wrote:
>> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>
>>> Assuming I didn't screw up anything with my audit of course.
>>>
>>> v2:
>>> - Dont forget wu_mutex (Christian König)
>>> - Keep the mmap_sem-less wait optimization (Thomas)
>>> - Use _lock_interruptible to be good citizens (Thomas)
>>>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
> btw I realized I didn't remove your r-b, since v1 was broken.
>
> For formality, can you pls reaffirm, or still something broken?

My r-b is still valid.

Only problem I see is that neither of us seems to have a good idea about 
the different VM_FAULT_* replies.

But that worked before so it should still work now,
Christian.

>
> Also from the other thread: Reviewed-by: Thomas Hellström <thellstrom@vmware.com>
>
> Thanks, Daniel
>
>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Cc: Huang Rui <ray.huang@amd.com>
>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
>>>    drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
>>>    include/drm/ttm/ttm_bo_api.h      |  4 ----
>>>    4 files changed, 5 insertions(+), 54 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 20ff56f27aa4..d1ce5d315d5b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
>>>    	dma_fence_put(bo->moving);
>>>    	if (!ttm_bo_uses_embedded_gem_object(bo))
>>>    		dma_resv_fini(&bo->base._resv);
>>> -	mutex_destroy(&bo->wu_mutex);
>>>    	bo->destroy(bo);
>>>    	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
>>>    }
>>> @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
>>>    	INIT_LIST_HEAD(&bo->ddestroy);
>>>    	INIT_LIST_HEAD(&bo->swap);
>>>    	INIT_LIST_HEAD(&bo->io_reserve_lru);
>>> -	mutex_init(&bo->wu_mutex);
>>>    	bo->bdev = bdev;
>>>    	bo->type = type;
>>>    	bo->num_pages = num_pages;
>>> @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>>>    		;
>>>    }
>>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
>>> -
>>> -/**
>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
>>> - * unreserved
>>> - *
>>> - * @bo: Pointer to buffer
>>> - */
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>> -{
>>> -	int ret;
>>> -
>>> -	/*
>>> -	 * In the absense of a wait_unlocked API,
>>> -	 * Use the bo::wu_mutex to avoid triggering livelocks due to
>>> -	 * concurrent use of this function. Note that this use of
>>> -	 * bo::wu_mutex can go away if we change locking order to
>>> -	 * mmap_sem -> bo::reserve.
>>> -	 */
>>> -	ret = mutex_lock_interruptible(&bo->wu_mutex);
>>> -	if (unlikely(ret != 0))
>>> -		return -ERESTARTSYS;
>>> -	if (!dma_resv_is_locked(bo->base.resv))
>>> -		goto out_unlock;
>>> -	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>> -	if (ret == -EINTR)
>>> -		ret = -ERESTARTSYS;
>>> -	if (unlikely(ret != 0))
>>> -		goto out_unlock;
>>> -	dma_resv_unlock(bo->base.resv);
>>> -
>>> -out_unlock:
>>> -	mutex_unlock(&bo->wu_mutex);
>>> -	return ret;
>>> -}
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> index fe81c565e7ef..82ea26a49959 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>> @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>>>    	INIT_LIST_HEAD(&fbo->base.lru);
>>>    	INIT_LIST_HEAD(&fbo->base.swap);
>>>    	INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
>>> -	mutex_init(&fbo->base.wu_mutex);
>>>    	fbo->base.moving = NULL;
>>>    	drm_vma_node_reset(&fbo->base.base.vma_node);
>>>    	atomic_set(&fbo->base.cpu_writers, 0);
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> index 76eedb963693..a61a35e57d1c 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>    		&bdev->man[bo->mem.mem_type];
>>>    	struct vm_area_struct cvma;
>>>    
>>> -	/*
>>> -	 * Work around locking order reversal in fault / nopfn
>>> -	 * between mmap_sem and bo_reserve: Perform a trylock operation
>>> -	 * for reserve, and if it fails, retry the fault after waiting
>>> -	 * for the buffer to become unreserved.
>>> -	 */
>>>    	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>    		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>    			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>> Not an expert on fault handling, but shouldn't this now be one if?
>>
>> E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE
>> instead of VM_FAULT_RETRY.
>>
>> But really take that with a grain of salt,
>> Christian.
>>
>>>    				ttm_bo_get(bo);
>>>    				up_read(&vmf->vma->vm_mm->mmap_sem);
>>> -				(void) ttm_bo_wait_unreserved(bo);
>>> +				if (!dma_resv_lock_interruptible(bo->base.resv,
>>> +								 NULL))
>>> +					dma_resv_unlock(bo->base.resv);
>>>    				ttm_bo_put(bo);
>>>    			}
>>>    
>>>    			return VM_FAULT_RETRY;
>>>    		}
>>>    
>>> -		/*
>>> -		 * If we'd want to change locking order to
>>> -		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>> -		 * instead of retrying the fault...
>>> -		 */
>>> -		return VM_FAULT_NOPAGE;
>>> +		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
>>> +			return VM_FAULT_NOPAGE;
>>>    	}
>>>    
>>>    	/*
>>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>>> index 43c4929a2171..21c7d0d28757 100644
>>> --- a/include/drm/ttm/ttm_bo_api.h
>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>> @@ -155,7 +155,6 @@ struct ttm_tt;
>>>     * @offset: The current GPU offset, which can have different meanings
>>>     * depending on the memory type. For SYSTEM type memory, it should be 0.
>>>     * @cur_placement: Hint of current placement.
>>> - * @wu_mutex: Wait unreserved mutex.
>>>     *
>>>     * Base class for TTM buffer object, that deals with data placement and CPU
>>>     * mappings. GPU mappings are really up to the driver, but for simpler GPUs
>>> @@ -229,8 +228,6 @@ struct ttm_buffer_object {
>>>    	uint64_t offset; /* GPU address space is independent of CPU word size */
>>>    
>>>    	struct sg_table *sg;
>>> -
>>> -	struct mutex wu_mutex;
>>>    };
>>>    
>>>    /**
>>> @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>>>    int ttm_bo_swapout(struct ttm_bo_global *glob,
>>>    			struct ttm_operation_ctx *ctx);
>>>    void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>>    
>>>    /**
>>>     * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
Thomas Hellström (Intel) Aug. 22, 2019, 2:24 p.m. UTC | #6
On 8/22/19 4:02 PM, Koenig, Christian wrote:
> Am 22.08.19 um 15:06 schrieb Daniel Vetter:
>> On Thu, Aug 22, 2019 at 07:56:56AM +0000, Koenig, Christian wrote:
>>> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>
>>>> Assuming I didn't screw up anything with my audit of course.
>>>>
>>>> v2:
>>>> - Dont forget wu_mutex (Christian König)
>>>> - Keep the mmap_sem-less wait optimization (Thomas)
>>>> - Use _lock_interruptible to be good citizens (Thomas)
>>>>
>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>> btw I realized I didn't remove your r-b, since v1 was broken.
>>
>> For formality, can you pls reaffirm, or still something broken?
> My r-b is still valid.
>
> Only problem I see is that neither of us seems to have a good idea about
> the different VM_FAULT_* replies.

I took a look in mm/gup.c. It seems like when using get_user_pages, 
VM_FAULT_RETRY will retry to a requesting caller telling it that a long 
wait was expected and not performed, whereas VM_FAULT_NOPAGE will just 
keep get_user_pages to spin. So the proposed patch should be correct 
from my understanding.

If the fault originates from user-space, I guess either is fine.

/Thomas


>
> But that worked before so it should still work now,
> Christian.
>
>> Also from the other thread: Reviewed-by: Thomas Hellström <thellstrom@vmware.com>
>>
>> Thanks, Daniel
>>
>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>> Cc: Huang Rui <ray.huang@amd.com>
>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>> ---
>>>>     drivers/gpu/drm/ttm/ttm_bo.c      | 36 -------------------------------
>>>>     drivers/gpu/drm/ttm/ttm_bo_util.c |  1 -
>>>>     drivers/gpu/drm/ttm/ttm_bo_vm.c   | 18 +++++-----------
>>>>     include/drm/ttm/ttm_bo_api.h      |  4 ----
>>>>     4 files changed, 5 insertions(+), 54 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 20ff56f27aa4..d1ce5d315d5b 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -162,7 +162,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
>>>>     	dma_fence_put(bo->moving);
>>>>     	if (!ttm_bo_uses_embedded_gem_object(bo))
>>>>     		dma_resv_fini(&bo->base._resv);
>>>> -	mutex_destroy(&bo->wu_mutex);
>>>>     	bo->destroy(bo);
>>>>     	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
>>>>     }
>>>> @@ -1319,7 +1318,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
>>>>     	INIT_LIST_HEAD(&bo->ddestroy);
>>>>     	INIT_LIST_HEAD(&bo->swap);
>>>>     	INIT_LIST_HEAD(&bo->io_reserve_lru);
>>>> -	mutex_init(&bo->wu_mutex);
>>>>     	bo->bdev = bdev;
>>>>     	bo->type = type;
>>>>     	bo->num_pages = num_pages;
>>>> @@ -1954,37 +1952,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>>>>     		;
>>>>     }
>>>>     EXPORT_SYMBOL(ttm_bo_swapout_all);
>>>> -
>>>> -/**
>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
>>>> - * unreserved
>>>> - *
>>>> - * @bo: Pointer to buffer
>>>> - */
>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>>> -{
>>>> -	int ret;
>>>> -
>>>> -	/*
>>>> -	 * In the absense of a wait_unlocked API,
>>>> -	 * Use the bo::wu_mutex to avoid triggering livelocks due to
>>>> -	 * concurrent use of this function. Note that this use of
>>>> -	 * bo::wu_mutex can go away if we change locking order to
>>>> -	 * mmap_sem -> bo::reserve.
>>>> -	 */
>>>> -	ret = mutex_lock_interruptible(&bo->wu_mutex);
>>>> -	if (unlikely(ret != 0))
>>>> -		return -ERESTARTSYS;
>>>> -	if (!dma_resv_is_locked(bo->base.resv))
>>>> -		goto out_unlock;
>>>> -	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>>> -	if (ret == -EINTR)
>>>> -		ret = -ERESTARTSYS;
>>>> -	if (unlikely(ret != 0))
>>>> -		goto out_unlock;
>>>> -	dma_resv_unlock(bo->base.resv);
>>>> -
>>>> -out_unlock:
>>>> -	mutex_unlock(&bo->wu_mutex);
>>>> -	return ret;
>>>> -}
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>>> index fe81c565e7ef..82ea26a49959 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>>>> @@ -508,7 +508,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>>>>     	INIT_LIST_HEAD(&fbo->base.lru);
>>>>     	INIT_LIST_HEAD(&fbo->base.swap);
>>>>     	INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
>>>> -	mutex_init(&fbo->base.wu_mutex);
>>>>     	fbo->base.moving = NULL;
>>>>     	drm_vma_node_reset(&fbo->base.base.vma_node);
>>>>     	atomic_set(&fbo->base.cpu_writers, 0);
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> index 76eedb963693..a61a35e57d1c 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> @@ -125,30 +125,22 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>>     		&bdev->man[bo->mem.mem_type];
>>>>     	struct vm_area_struct cvma;
>>>>     
>>>> -	/*
>>>> -	 * Work around locking order reversal in fault / nopfn
>>>> -	 * between mmap_sem and bo_reserve: Perform a trylock operation
>>>> -	 * for reserve, and if it fails, retry the fault after waiting
>>>> -	 * for the buffer to become unreserved.
>>>> -	 */
>>>>     	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>>     		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>>     			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>> Not an expert on fault handling, but shouldn't this now be one if?
>>>
>>> E.g. if FAULT_FLAG_RETRY_NOWAIT is set we should return VM_FAULT_NOPAGE
>>> instead of VM_FAULT_RETRY.
>>>
>>> But really take that with a grain of salt,
>>> Christian.
>>>
>>>>     				ttm_bo_get(bo);
>>>>     				up_read(&vmf->vma->vm_mm->mmap_sem);
>>>> -				(void) ttm_bo_wait_unreserved(bo);
>>>> +				if (!dma_resv_lock_interruptible(bo->base.resv,
>>>> +								 NULL))
>>>> +					dma_resv_unlock(bo->base.resv);
>>>>     				ttm_bo_put(bo);
>>>>     			}
>>>>     
>>>>     			return VM_FAULT_RETRY;
>>>>     		}
>>>>     
>>>> -		/*
>>>> -		 * If we'd want to change locking order to
>>>> -		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>>> -		 * instead of retrying the fault...
>>>> -		 */
>>>> -		return VM_FAULT_NOPAGE;
>>>> +		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
>>>> +			return VM_FAULT_NOPAGE;
>>>>     	}
>>>>     
>>>>     	/*
>>>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>>>> index 43c4929a2171..21c7d0d28757 100644
>>>> --- a/include/drm/ttm/ttm_bo_api.h
>>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>>> @@ -155,7 +155,6 @@ struct ttm_tt;
>>>>      * @offset: The current GPU offset, which can have different meanings
>>>>      * depending on the memory type. For SYSTEM type memory, it should be 0.
>>>>      * @cur_placement: Hint of current placement.
>>>> - * @wu_mutex: Wait unreserved mutex.
>>>>      *
>>>>      * Base class for TTM buffer object, that deals with data placement and CPU
>>>>      * mappings. GPU mappings are really up to the driver, but for simpler GPUs
>>>> @@ -229,8 +228,6 @@ struct ttm_buffer_object {
>>>>     	uint64_t offset; /* GPU address space is independent of CPU word size */
>>>>     
>>>>     	struct sg_table *sg;
>>>> -
>>>> -	struct mutex wu_mutex;
>>>>     };
>>>>     
>>>>     /**
>>>> @@ -765,7 +762,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>>>>     int ttm_bo_swapout(struct ttm_bo_global *glob,
>>>>     			struct ttm_operation_ctx *ctx);
>>>>     void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>>>     
>>>>     /**
>>>>      * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Hellström (Intel) Aug. 22, 2019, 2:30 p.m. UTC | #7
On 8/22/19 4:24 PM, Thomas Hellström (VMware) wrote:
> On 8/22/19 4:02 PM, Koenig, Christian wrote:
>> Am 22.08.19 um 15:06 schrieb Daniel Vetter:
>>> On Thu, Aug 22, 2019 at 07:56:56AM +0000, Koenig, Christian wrote:
>>>> Am 22.08.19 um 08:49 schrieb Daniel Vetter:
>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>>
>>>>> Assuming I didn't screw up anything with my audit of course.
>>>>>
>>>>> v2:
>>>>> - Dont forget wu_mutex (Christian König)
>>>>> - Keep the mmap_sem-less wait optimization (Thomas)
>>>>> - Use _lock_interruptible to be good citizens (Thomas)
>>>>>
>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>> btw I realized I didn't remove your r-b, since v1 was broken.
>>>
>>> For formality, can you pls reaffirm, or still something broken?
>> My r-b is still valid.
>>
>> Only problem I see is that neither of us seems to have a good idea about
>> the different VM_FAULT_* replies.
>
> I took a look in mm/gup.c. It seems like when using get_user_pages, 
> VM_FAULT_RETRY will retry 

s/retry/return/


> to a requesting caller telling it that a long wait was expected and 
> not performed, whereas VM_FAULT_NOPAGE will just keep get_user_pages 
> to spin. So the proposed patch should be correct from my understanding.
>
> If the fault originates from user-space, I guess either is fine.
>
> /Thomas
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 20ff56f27aa4..d1ce5d315d5b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -162,7 +162,6 @@  static void ttm_bo_release_list(struct kref *list_kref)
 	dma_fence_put(bo->moving);
 	if (!ttm_bo_uses_embedded_gem_object(bo))
 		dma_resv_fini(&bo->base._resv);
-	mutex_destroy(&bo->wu_mutex);
 	bo->destroy(bo);
 	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
 }
@@ -1319,7 +1318,6 @@  int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bo->ddestroy);
 	INIT_LIST_HEAD(&bo->swap);
 	INIT_LIST_HEAD(&bo->io_reserve_lru);
-	mutex_init(&bo->wu_mutex);
 	bo->bdev = bdev;
 	bo->type = type;
 	bo->num_pages = num_pages;
@@ -1954,37 +1952,3 @@  void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
 		;
 }
 EXPORT_SYMBOL(ttm_bo_swapout_all);
-
-/**
- * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
- * unreserved
- *
- * @bo: Pointer to buffer
- */
-int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
-{
-	int ret;
-
-	/*
-	 * In the absense of a wait_unlocked API,
-	 * Use the bo::wu_mutex to avoid triggering livelocks due to
-	 * concurrent use of this function. Note that this use of
-	 * bo::wu_mutex can go away if we change locking order to
-	 * mmap_sem -> bo::reserve.
-	 */
-	ret = mutex_lock_interruptible(&bo->wu_mutex);
-	if (unlikely(ret != 0))
-		return -ERESTARTSYS;
-	if (!dma_resv_is_locked(bo->base.resv))
-		goto out_unlock;
-	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
-	if (ret == -EINTR)
-		ret = -ERESTARTSYS;
-	if (unlikely(ret != 0))
-		goto out_unlock;
-	dma_resv_unlock(bo->base.resv);
-
-out_unlock:
-	mutex_unlock(&bo->wu_mutex);
-	return ret;
-}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index fe81c565e7ef..82ea26a49959 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -508,7 +508,6 @@  static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	INIT_LIST_HEAD(&fbo->base.lru);
 	INIT_LIST_HEAD(&fbo->base.swap);
 	INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
-	mutex_init(&fbo->base.wu_mutex);
 	fbo->base.moving = NULL;
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 	atomic_set(&fbo->base.cpu_writers, 0);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 76eedb963693..a61a35e57d1c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -125,30 +125,22 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		&bdev->man[bo->mem.mem_type];
 	struct vm_area_struct cvma;
 
-	/*
-	 * Work around locking order reversal in fault / nopfn
-	 * between mmap_sem and bo_reserve: Perform a trylock operation
-	 * for reserve, and if it fails, retry the fault after waiting
-	 * for the buffer to become unreserved.
-	 */
 	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
 		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
 			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
 				ttm_bo_get(bo);
 				up_read(&vmf->vma->vm_mm->mmap_sem);
-				(void) ttm_bo_wait_unreserved(bo);
+				if (!dma_resv_lock_interruptible(bo->base.resv,
+								 NULL))
+					dma_resv_unlock(bo->base.resv);
 				ttm_bo_put(bo);
 			}
 
 			return VM_FAULT_RETRY;
 		}
 
-		/*
-		 * If we'd want to change locking order to
-		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
-		 * instead of retrying the fault...
-		 */
-		return VM_FAULT_NOPAGE;
+		if (dma_resv_lock_interruptible(bo->base.resv, NULL))
+			return VM_FAULT_NOPAGE;
 	}
 
 	/*
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 43c4929a2171..21c7d0d28757 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -155,7 +155,6 @@  struct ttm_tt;
  * @offset: The current GPU offset, which can have different meanings
  * depending on the memory type. For SYSTEM type memory, it should be 0.
  * @cur_placement: Hint of current placement.
- * @wu_mutex: Wait unreserved mutex.
  *
  * Base class for TTM buffer object, that deals with data placement and CPU
  * mappings. GPU mappings are really up to the driver, but for simpler GPUs
@@ -229,8 +228,6 @@  struct ttm_buffer_object {
 	uint64_t offset; /* GPU address space is independent of CPU word size */
 
 	struct sg_table *sg;
-
-	struct mutex wu_mutex;
 };
 
 /**
@@ -765,7 +762,6 @@  ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
 int ttm_bo_swapout(struct ttm_bo_global *glob,
 			struct ttm_operation_ctx *ctx);
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
-int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
 
 /**
  * ttm_bo_uses_embedded_gem_object - check if the given bo uses the