[3/3] drm/ttm: remove ttm_bo_wait_unreserved
diff mbox series

Message ID 20190820145336.15649-4-daniel.vetter@ffwll.ch
State New
Headers show
Series
  • RFC/T: dma_resv vs. mmap_sem
Related show

Commit Message

Daniel Vetter Aug. 20, 2019, 2:53 p.m. UTC
With nouveau fixed all ttm-using drives have the correct nesting of
mmap_sem vs dma_resv, and we can just lock the buffer.

Assuming I didn't screw up anything with my audit of course.

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
 include/drm/ttm/ttm_bo_api.h    |  1 -
 3 files changed, 1 insertion(+), 60 deletions(-)

Comments

Koenig, Christian Aug. 20, 2019, 3:16 p.m. UTC | #1
Am 20.08.19 um 16:53 schrieb Daniel Vetter:
> With nouveau fixed all ttm-using drives have the correct nesting of
> mmap_sem vs dma_resv, and we can just lock the buffer.
>
> Assuming I didn't screw up anything with my audit of course.
>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Huang Rui <ray.huang@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> Cc: Thomas Hellstrom <thellstrom@vmware.com>

Yes, please. But one more point: you can now remove bo->wu_mutex as well!

Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>   include/drm/ttm/ttm_bo_api.h    |  1 -
>   3 files changed, 1 insertion(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 20ff56f27aa4..a952dd624b06 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>   		;
>   }
>   EXPORT_SYMBOL(ttm_bo_swapout_all);
> -
> -/**
> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> - * unreserved
> - *
> - * @bo: Pointer to buffer
> - */
> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> -{
> -	int ret;
> -
> -	/*
> -	 * In the absense of a wait_unlocked API,
> -	 * Use the bo::wu_mutex to avoid triggering livelocks due to
> -	 * concurrent use of this function. Note that this use of
> -	 * bo::wu_mutex can go away if we change locking order to
> -	 * mmap_sem -> bo::reserve.
> -	 */
> -	ret = mutex_lock_interruptible(&bo->wu_mutex);
> -	if (unlikely(ret != 0))
> -		return -ERESTARTSYS;
> -	if (!dma_resv_is_locked(bo->base.resv))
> -		goto out_unlock;
> -	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> -	if (ret == -EINTR)
> -		ret = -ERESTARTSYS;
> -	if (unlikely(ret != 0))
> -		goto out_unlock;
> -	dma_resv_unlock(bo->base.resv);
> -
> -out_unlock:
> -	mutex_unlock(&bo->wu_mutex);
> -	return ret;
> -}
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 76eedb963693..505e1787aeea 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		&bdev->man[bo->mem.mem_type];
>   	struct vm_area_struct cvma;
>   
> -	/*
> -	 * Work around locking order reversal in fault / nopfn
> -	 * between mmap_sem and bo_reserve: Perform a trylock operation
> -	 * for reserve, and if it fails, retry the fault after waiting
> -	 * for the buffer to become unreserved.
> -	 */
> -	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> -		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> -			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> -				ttm_bo_get(bo);
> -				up_read(&vmf->vma->vm_mm->mmap_sem);
> -				(void) ttm_bo_wait_unreserved(bo);
> -				ttm_bo_put(bo);
> -			}
> -
> -			return VM_FAULT_RETRY;
> -		}
> -
> -		/*
> -		 * If we'd want to change locking order to
> -		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> -		 * instead of retrying the fault...
> -		 */
> -		return VM_FAULT_NOPAGE;
> -	}
> +	dma_resv_lock(bo->base.resv, NULL);
>   
>   	/*
>   	 * Refuse to fault imported pages. This should be handled
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 43c4929a2171..6b50e624e3e2 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>   int ttm_bo_swapout(struct ttm_bo_global *glob,
>   			struct ttm_operation_ctx *ctx);
>   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>   
>   /**
>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
Daniel Vetter Aug. 20, 2019, 3:21 p.m. UTC | #2
On Tue, Aug 20, 2019 at 5:16 PM Koenig, Christian
<Christian.Koenig@amd.com> wrote:
>
> Am 20.08.19 um 16:53 schrieb Daniel Vetter:
> > With nouveau fixed all ttm-using drives have the correct nesting of
> > mmap_sem vs dma_resv, and we can just lock the buffer.
> >
> > Assuming I didn't screw up anything with my audit of course.
> >
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Cc: Huang Rui <ray.huang@amd.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> > Cc: Thomas Hellstrom <thellstrom@vmware.com>
>
> Yes, please. But one more point: you can now remove bo->wu_mutex as well!

Ah right totally forgot about that in my enthusiasm after all the
auditing and fixing nouveau.

> Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>

Thanks, I already respun the patches, so will be in the next version.
Can you pls also give this a spin on the amdgpu CI, just to make sure
it's all fine? With full lockdep ofc. And then reply with a t-b.

Thanks, Daniel
>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >   include/drm/ttm/ttm_bo_api.h    |  1 -
> >   3 files changed, 1 insertion(+), 60 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 20ff56f27aa4..a952dd624b06 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
> >               ;
> >   }
> >   EXPORT_SYMBOL(ttm_bo_swapout_all);
> > -
> > -/**
> > - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> > - * unreserved
> > - *
> > - * @bo: Pointer to buffer
> > - */
> > -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> > -{
> > -     int ret;
> > -
> > -     /*
> > -      * In the absense of a wait_unlocked API,
> > -      * Use the bo::wu_mutex to avoid triggering livelocks due to
> > -      * concurrent use of this function. Note that this use of
> > -      * bo::wu_mutex can go away if we change locking order to
> > -      * mmap_sem -> bo::reserve.
> > -      */
> > -     ret = mutex_lock_interruptible(&bo->wu_mutex);
> > -     if (unlikely(ret != 0))
> > -             return -ERESTARTSYS;
> > -     if (!dma_resv_is_locked(bo->base.resv))
> > -             goto out_unlock;
> > -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> > -     if (ret == -EINTR)
> > -             ret = -ERESTARTSYS;
> > -     if (unlikely(ret != 0))
> > -             goto out_unlock;
> > -     dma_resv_unlock(bo->base.resv);
> > -
> > -out_unlock:
> > -     mutex_unlock(&bo->wu_mutex);
> > -     return ret;
> > -}
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index 76eedb963693..505e1787aeea 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> >               &bdev->man[bo->mem.mem_type];
> >       struct vm_area_struct cvma;
> >
> > -     /*
> > -      * Work around locking order reversal in fault / nopfn
> > -      * between mmap_sem and bo_reserve: Perform a trylock operation
> > -      * for reserve, and if it fails, retry the fault after waiting
> > -      * for the buffer to become unreserved.
> > -      */
> > -     if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> > -             if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> > -                     if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> > -                             ttm_bo_get(bo);
> > -                             up_read(&vmf->vma->vm_mm->mmap_sem);
> > -                             (void) ttm_bo_wait_unreserved(bo);
> > -                             ttm_bo_put(bo);
> > -                     }
> > -
> > -                     return VM_FAULT_RETRY;
> > -             }
> > -
> > -             /*
> > -              * If we'd want to change locking order to
> > -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> > -              * instead of retrying the fault...
> > -              */
> > -             return VM_FAULT_NOPAGE;
> > -     }
> > +     dma_resv_lock(bo->base.resv, NULL);
> >
> >       /*
> >        * Refuse to fault imported pages. This should be handled
> > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> > index 43c4929a2171..6b50e624e3e2 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
> >   int ttm_bo_swapout(struct ttm_bo_global *glob,
> >                       struct ttm_operation_ctx *ctx);
> >   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> > -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> >
> >   /**
> >    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
Koenig, Christian Aug. 20, 2019, 3:34 p.m. UTC | #3
Am 20.08.19 um 17:21 schrieb Daniel Vetter:
> On Tue, Aug 20, 2019 at 5:16 PM Koenig, Christian
> <Christian.Koenig@amd.com> wrote:
>> Am 20.08.19 um 16:53 schrieb Daniel Vetter:
>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>
>>> Assuming I didn't screw up anything with my audit of course.
>>>
>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Cc: Huang Rui <ray.huang@amd.com>
>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>> Yes, please. But one more point: you can now remove bo->wu_mutex as well!
> Ah right totally forgot about that in my enthusiasm after all the
> auditing and fixing nouveau.
>
>> Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>
> Thanks, I already respun the patches, so will be in the next version.
> Can you pls also give this a spin on the amdgpu CI, just to make sure
> it's all fine? With full lockdep ofc. And then reply with a t-b.

I can ask for this on our call tomorrow, but I fear our CI 
infrastructure is not ready yet.

Christian.

>
> Thanks, Daniel
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>    include/drm/ttm/ttm_bo_api.h    |  1 -
>>>    3 files changed, 1 insertion(+), 60 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 20ff56f27aa4..a952dd624b06 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>>>                ;
>>>    }
>>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
>>> -
>>> -/**
>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
>>> - * unreserved
>>> - *
>>> - * @bo: Pointer to buffer
>>> - */
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>> -{
>>> -     int ret;
>>> -
>>> -     /*
>>> -      * In the absense of a wait_unlocked API,
>>> -      * Use the bo::wu_mutex to avoid triggering livelocks due to
>>> -      * concurrent use of this function. Note that this use of
>>> -      * bo::wu_mutex can go away if we change locking order to
>>> -      * mmap_sem -> bo::reserve.
>>> -      */
>>> -     ret = mutex_lock_interruptible(&bo->wu_mutex);
>>> -     if (unlikely(ret != 0))
>>> -             return -ERESTARTSYS;
>>> -     if (!dma_resv_is_locked(bo->base.resv))
>>> -             goto out_unlock;
>>> -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>> -     if (ret == -EINTR)
>>> -             ret = -ERESTARTSYS;
>>> -     if (unlikely(ret != 0))
>>> -             goto out_unlock;
>>> -     dma_resv_unlock(bo->base.resv);
>>> -
>>> -out_unlock:
>>> -     mutex_unlock(&bo->wu_mutex);
>>> -     return ret;
>>> -}
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> index 76eedb963693..505e1787aeea 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>                &bdev->man[bo->mem.mem_type];
>>>        struct vm_area_struct cvma;
>>>
>>> -     /*
>>> -      * Work around locking order reversal in fault / nopfn
>>> -      * between mmap_sem and bo_reserve: Perform a trylock operation
>>> -      * for reserve, and if it fails, retry the fault after waiting
>>> -      * for the buffer to become unreserved.
>>> -      */
>>> -     if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>> -             if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>> -                     if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>> -                             ttm_bo_get(bo);
>>> -                             up_read(&vmf->vma->vm_mm->mmap_sem);
>>> -                             (void) ttm_bo_wait_unreserved(bo);
>>> -                             ttm_bo_put(bo);
>>> -                     }
>>> -
>>> -                     return VM_FAULT_RETRY;
>>> -             }
>>> -
>>> -             /*
>>> -              * If we'd want to change locking order to
>>> -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>> -              * instead of retrying the fault...
>>> -              */
>>> -             return VM_FAULT_NOPAGE;
>>> -     }
>>> +     dma_resv_lock(bo->base.resv, NULL);
>>>
>>>        /*
>>>         * Refuse to fault imported pages. This should be handled
>>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>>> index 43c4929a2171..6b50e624e3e2 100644
>>> --- a/include/drm/ttm/ttm_bo_api.h
>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>>>    int ttm_bo_swapout(struct ttm_bo_global *glob,
>>>                        struct ttm_operation_ctx *ctx);
>>>    void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>>
>>>    /**
>>>     * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
Daniel Vetter Aug. 20, 2019, 3:41 p.m. UTC | #4
On Tue, Aug 20, 2019 at 5:34 PM Koenig, Christian
<Christian.Koenig@amd.com> wrote:
>
> Am 20.08.19 um 17:21 schrieb Daniel Vetter:
> > On Tue, Aug 20, 2019 at 5:16 PM Koenig, Christian
> > <Christian.Koenig@amd.com> wrote:
> >> Am 20.08.19 um 16:53 schrieb Daniel Vetter:
> >>> With nouveau fixed all ttm-using drives have the correct nesting of
> >>> mmap_sem vs dma_resv, and we can just lock the buffer.
> >>>
> >>> Assuming I didn't screw up anything with my audit of course.
> >>>
> >>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> >>> Cc: Christian Koenig <christian.koenig@amd.com>
> >>> Cc: Huang Rui <ray.huang@amd.com>
> >>> Cc: Gerd Hoffmann <kraxel@redhat.com>
> >>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> >>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> >> Yes, please. But one more point: you can now remove bo->wu_mutex as well!
> > Ah right totally forgot about that in my enthusiasm after all the
> > auditing and fixing nouveau.
> >
> >> Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>
> > Thanks, I already respun the patches, so will be in the next version.
> > Can you pls also give this a spin on the amdgpu CI, just to make sure
> > it's all fine? With full lockdep ofc. And then reply with a t-b.
>
> I can ask for this on our call tomorrow, but I fear our CI
> infrastructure is not ready yet.

I thought you have some internal branch you all commit amdgpu stuff
for, and then Alex goes and collects the pieces that are ready? Or
does that all blow up if you push a patch which touches code outside
of the dkms?
-Daniel

>
> Christian.
>
> >
> > Thanks, Daniel
> >>> ---
> >>>    drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >>>    include/drm/ttm/ttm_bo_api.h    |  1 -
> >>>    3 files changed, 1 insertion(+), 60 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >>> index 20ff56f27aa4..a952dd624b06 100644
> >>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
> >>>                ;
> >>>    }
> >>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
> >>> -
> >>> -/**
> >>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> >>> - * unreserved
> >>> - *
> >>> - * @bo: Pointer to buffer
> >>> - */
> >>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> >>> -{
> >>> -     int ret;
> >>> -
> >>> -     /*
> >>> -      * In the absense of a wait_unlocked API,
> >>> -      * Use the bo::wu_mutex to avoid triggering livelocks due to
> >>> -      * concurrent use of this function. Note that this use of
> >>> -      * bo::wu_mutex can go away if we change locking order to
> >>> -      * mmap_sem -> bo::reserve.
> >>> -      */
> >>> -     ret = mutex_lock_interruptible(&bo->wu_mutex);
> >>> -     if (unlikely(ret != 0))
> >>> -             return -ERESTARTSYS;
> >>> -     if (!dma_resv_is_locked(bo->base.resv))
> >>> -             goto out_unlock;
> >>> -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> >>> -     if (ret == -EINTR)
> >>> -             ret = -ERESTARTSYS;
> >>> -     if (unlikely(ret != 0))
> >>> -             goto out_unlock;
> >>> -     dma_resv_unlock(bo->base.resv);
> >>> -
> >>> -out_unlock:
> >>> -     mutex_unlock(&bo->wu_mutex);
> >>> -     return ret;
> >>> -}
> >>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>> index 76eedb963693..505e1787aeea 100644
> >>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> >>>                &bdev->man[bo->mem.mem_type];
> >>>        struct vm_area_struct cvma;
> >>>
> >>> -     /*
> >>> -      * Work around locking order reversal in fault / nopfn
> >>> -      * between mmap_sem and bo_reserve: Perform a trylock operation
> >>> -      * for reserve, and if it fails, retry the fault after waiting
> >>> -      * for the buffer to become unreserved.
> >>> -      */
> >>> -     if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >>> -             if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >>> -                     if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> >>> -                             ttm_bo_get(bo);
> >>> -                             up_read(&vmf->vma->vm_mm->mmap_sem);
> >>> -                             (void) ttm_bo_wait_unreserved(bo);
> >>> -                             ttm_bo_put(bo);
> >>> -                     }
> >>> -
> >>> -                     return VM_FAULT_RETRY;
> >>> -             }
> >>> -
> >>> -             /*
> >>> -              * If we'd want to change locking order to
> >>> -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> >>> -              * instead of retrying the fault...
> >>> -              */
> >>> -             return VM_FAULT_NOPAGE;
> >>> -     }
> >>> +     dma_resv_lock(bo->base.resv, NULL);
> >>>
> >>>        /*
> >>>         * Refuse to fault imported pages. This should be handled
> >>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> >>> index 43c4929a2171..6b50e624e3e2 100644
> >>> --- a/include/drm/ttm/ttm_bo_api.h
> >>> +++ b/include/drm/ttm/ttm_bo_api.h
> >>> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
> >>>    int ttm_bo_swapout(struct ttm_bo_global *glob,
> >>>                        struct ttm_operation_ctx *ctx);
> >>>    void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> >>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> >>>
> >>>    /**
> >>>     * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> >
>
Koenig, Christian Aug. 20, 2019, 3:45 p.m. UTC | #5
Am 20.08.19 um 17:41 schrieb Daniel Vetter:
> On Tue, Aug 20, 2019 at 5:34 PM Koenig, Christian
> <Christian.Koenig@amd.com> wrote:
>> Am 20.08.19 um 17:21 schrieb Daniel Vetter:
>>> On Tue, Aug 20, 2019 at 5:16 PM Koenig, Christian
>>> <Christian.Koenig@amd.com> wrote:
>>>> Am 20.08.19 um 16:53 schrieb Daniel Vetter:
>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>>
>>>>> Assuming I didn't screw up anything with my audit of course.
>>>>>
>>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>>> Cc: Huang Rui <ray.huang@amd.com>
>>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>> Yes, please. But one more point: you can now remove bo->wu_mutex as well!
>>> Ah right totally forgot about that in my enthusiasm after all the
>>> auditing and fixing nouveau.
>>>
>>>> Apart from that Reviewed-by: Christian König <christian.koenig@amd.com>
>>> Thanks, I already respun the patches, so will be in the next version.
>>> Can you pls also give this a spin on the amdgpu CI, just to make sure
>>> it's all fine? With full lockdep ofc. And then reply with a t-b.
>> I can ask for this on our call tomorrow, but I fear our CI
>> infrastructure is not ready yet.
> I thought you have some internal branch you all commit amdgpu stuff
> for, and then Alex goes and collects the pieces that are ready?

No, that part is correct. The problem is that this branch is not QA 
tested regularly as far as I know.

> Or does that all blow up if you push a patch which touches code outside
> of the dkms?

No, but the problem is related to that.

See the release branches for dkms are separate and indeed QA tested 
regularly.

But changes from amd-staging-drm-next are only cherry picked into those 
in certain intervals.

Well going to discuss that tomorrow,
Christian.

> -Daniel
>
>> Christian.
>>
>>> Thanks, Daniel
>>>>> ---
>>>>>     drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>>>     drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>>>     include/drm/ttm/ttm_bo_api.h    |  1 -
>>>>>     3 files changed, 1 insertion(+), 60 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>> index 20ff56f27aa4..a952dd624b06 100644
>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>>>>>                 ;
>>>>>     }
>>>>>     EXPORT_SYMBOL(ttm_bo_swapout_all);
>>>>> -
>>>>> -/**
>>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
>>>>> - * unreserved
>>>>> - *
>>>>> - * @bo: Pointer to buffer
>>>>> - */
>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>>>> -{
>>>>> -     int ret;
>>>>> -
>>>>> -     /*
>>>>> -      * In the absense of a wait_unlocked API,
>>>>> -      * Use the bo::wu_mutex to avoid triggering livelocks due to
>>>>> -      * concurrent use of this function. Note that this use of
>>>>> -      * bo::wu_mutex can go away if we change locking order to
>>>>> -      * mmap_sem -> bo::reserve.
>>>>> -      */
>>>>> -     ret = mutex_lock_interruptible(&bo->wu_mutex);
>>>>> -     if (unlikely(ret != 0))
>>>>> -             return -ERESTARTSYS;
>>>>> -     if (!dma_resv_is_locked(bo->base.resv))
>>>>> -             goto out_unlock;
>>>>> -     ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>>>> -     if (ret == -EINTR)
>>>>> -             ret = -ERESTARTSYS;
>>>>> -     if (unlikely(ret != 0))
>>>>> -             goto out_unlock;
>>>>> -     dma_resv_unlock(bo->base.resv);
>>>>> -
>>>>> -out_unlock:
>>>>> -     mutex_unlock(&bo->wu_mutex);
>>>>> -     return ret;
>>>>> -}
>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>> index 76eedb963693..505e1787aeea 100644
>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>>>                 &bdev->man[bo->mem.mem_type];
>>>>>         struct vm_area_struct cvma;
>>>>>
>>>>> -     /*
>>>>> -      * Work around locking order reversal in fault / nopfn
>>>>> -      * between mmap_sem and bo_reserve: Perform a trylock operation
>>>>> -      * for reserve, and if it fails, retry the fault after waiting
>>>>> -      * for the buffer to become unreserved.
>>>>> -      */
>>>>> -     if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>>> -             if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>>> -                     if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>>>> -                             ttm_bo_get(bo);
>>>>> -                             up_read(&vmf->vma->vm_mm->mmap_sem);
>>>>> -                             (void) ttm_bo_wait_unreserved(bo);
>>>>> -                             ttm_bo_put(bo);
>>>>> -                     }
>>>>> -
>>>>> -                     return VM_FAULT_RETRY;
>>>>> -             }
>>>>> -
>>>>> -             /*
>>>>> -              * If we'd want to change locking order to
>>>>> -              * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>>>> -              * instead of retrying the fault...
>>>>> -              */
>>>>> -             return VM_FAULT_NOPAGE;
>>>>> -     }
>>>>> +     dma_resv_lock(bo->base.resv, NULL);
>>>>>
>>>>>         /*
>>>>>          * Refuse to fault imported pages. This should be handled
>>>>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>>>>> index 43c4929a2171..6b50e624e3e2 100644
>>>>> --- a/include/drm/ttm/ttm_bo_api.h
>>>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>>>> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>>>>>     int ttm_bo_swapout(struct ttm_bo_global *glob,
>>>>>                         struct ttm_operation_ctx *ctx);
>>>>>     void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>>>>
>>>>>     /**
>>>>>      * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
Thomas Hellström (VMware) Aug. 21, 2019, 12:40 p.m. UTC | #6
On 8/20/19 4:53 PM, Daniel Vetter wrote:
> With nouveau fixed all ttm-using drives have the correct nesting of
> mmap_sem vs dma_resv, and we can just lock the buffer.
>
> Assuming I didn't screw up anything with my audit of course.
>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Huang Rui <ray.huang@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>   include/drm/ttm/ttm_bo_api.h    |  1 -
>   3 files changed, 1 insertion(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 20ff56f27aa4..a952dd624b06 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>   		;
>   }
>   EXPORT_SYMBOL(ttm_bo_swapout_all);
> -
> -/**
> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> - * unreserved
> - *
> - * @bo: Pointer to buffer
> - */
> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> -{
> -	int ret;
> -
> -	/*
> -	 * In the absense of a wait_unlocked API,
> -	 * Use the bo::wu_mutex to avoid triggering livelocks due to
> -	 * concurrent use of this function. Note that this use of
> -	 * bo::wu_mutex can go away if we change locking order to
> -	 * mmap_sem -> bo::reserve.
> -	 */
> -	ret = mutex_lock_interruptible(&bo->wu_mutex);
> -	if (unlikely(ret != 0))
> -		return -ERESTARTSYS;
> -	if (!dma_resv_is_locked(bo->base.resv))
> -		goto out_unlock;
> -	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> -	if (ret == -EINTR)
> -		ret = -ERESTARTSYS;
> -	if (unlikely(ret != 0))
> -		goto out_unlock;
> -	dma_resv_unlock(bo->base.resv);
> -
> -out_unlock:
> -	mutex_unlock(&bo->wu_mutex);
> -	return ret;
> -}
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 76eedb963693..505e1787aeea 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		&bdev->man[bo->mem.mem_type];
>   	struct vm_area_struct cvma;
>   
> -	/*
> -	 * Work around locking order reversal in fault / nopfn
> -	 * between mmap_sem and bo_reserve: Perform a trylock operation
> -	 * for reserve, and if it fails, retry the fault after waiting
> -	 * for the buffer to become unreserved.
> -	 */
> -	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> -		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> -			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> -				ttm_bo_get(bo);
> -				up_read(&vmf->vma->vm_mm->mmap_sem);
> -				(void) ttm_bo_wait_unreserved(bo);
> -				ttm_bo_put(bo);
> -			}
> -
> -			return VM_FAULT_RETRY;
> -		}
> -
> -		/*
> -		 * If we'd want to change locking order to
> -		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> -		 * instead of retrying the fault...
> -		 */

I think you should justify why the above code is removed, since the 
comments actually outlines what to do if we change locking order.

The code that's removed above is not for adjusting locking orders but to 
decrease the mm latency by releasing the mmap_sem while waiting for bo 
reserve which in turn might be waiting for GPU. At a minimum we should 
have a separate patch with justification.

Note that the caller here ensures locking progress by adjusting the 
RETRY flags after a retry.

/Thomas


> -		return VM_FAULT_NOPAGE;
> -	}
> +	dma_resv_lock(bo->base.resv, NULL);
>   
>   	/*
>   	 * Refuse to fault imported pages. This should be handled
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 43c4929a2171..6b50e624e3e2 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>   int ttm_bo_swapout(struct ttm_bo_global *glob,
>   			struct ttm_operation_ctx *ctx);
>   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>   
>   /**
>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
Thomas Hellström (VMware) Aug. 21, 2019, 12:47 p.m. UTC | #7
On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>> With nouveau fixed all ttm-using drives have the correct nesting of
>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>
>> Assuming I didn't screw up anything with my audit of course.
>>
>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Cc: Huang Rui <ray.huang@amd.com>
>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>   include/drm/ttm/ttm_bo_api.h    |  1 -
>>   3 files changed, 1 insertion(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 20ff56f27aa4..a952dd624b06 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device 
>> *bdev)
>>           ;
>>   }
>>   EXPORT_SYMBOL(ttm_bo_swapout_all);
>> -
>> -/**
>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object 
>> to become
>> - * unreserved
>> - *
>> - * @bo: Pointer to buffer
>> - */
>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>> -{
>> -    int ret;
>> -
>> -    /*
>> -     * In the absense of a wait_unlocked API,
>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
>> -     * concurrent use of this function. Note that this use of
>> -     * bo::wu_mutex can go away if we change locking order to
>> -     * mmap_sem -> bo::reserve.
>> -     */
>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
>> -    if (unlikely(ret != 0))
>> -        return -ERESTARTSYS;
>> -    if (!dma_resv_is_locked(bo->base.resv))
>> -        goto out_unlock;
>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>> -    if (ret == -EINTR)
>> -        ret = -ERESTARTSYS;
>> -    if (unlikely(ret != 0))
>> -        goto out_unlock;
>> -    dma_resv_unlock(bo->base.resv);
>> -
>> -out_unlock:
>> -    mutex_unlock(&bo->wu_mutex);
>> -    return ret;
>> -}
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> index 76eedb963693..505e1787aeea 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct 
>> vm_fault *vmf)
>>           &bdev->man[bo->mem.mem_type];
>>       struct vm_area_struct cvma;
>>   -    /*
>> -     * Work around locking order reversal in fault / nopfn
>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
>> -     * for reserve, and if it fails, retry the fault after waiting
>> -     * for the buffer to become unreserved.
>> -     */
>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>> -                ttm_bo_get(bo);
>> - up_read(&vmf->vma->vm_mm->mmap_sem);
>> -                (void) ttm_bo_wait_unreserved(bo);
>> -                ttm_bo_put(bo);
>> -            }
>> -
>> -            return VM_FAULT_RETRY;
>> -        }
>> -
>> -        /*
>> -         * If we'd want to change locking order to
>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>> -         * instead of retrying the fault...
>> -         */
>
> I think you should justify why the above code is removed, since the 
> comments actually outlines what to do if we change locking order.
>
> The code that's removed above is not for adjusting locking orders but 
> to decrease the mm latency by releasing the mmap_sem while waiting for 
> bo reserve which in turn might be waiting for GPU. At a minimum we 
> should have a separate patch with justification.
>
> Note that the caller here ensures locking progress by adjusting the 
> RETRY flags after a retry.

And this last sentence also means we still can remove the wu-mutex when 
the locking order is changed, since the chance of livelock is goes away. 
IIRC only a single RETRY spin is allowed by the mm core.

/Thomas

>
> /Thomas
>
>
>> -        return VM_FAULT_NOPAGE;
>> -    }
>> +    dma_resv_lock(bo->base.resv, NULL);
>>         /*
>>        * Refuse to fault imported pages. This should be handled
>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>> index 43c4929a2171..6b50e624e3e2 100644
>> --- a/include/drm/ttm/ttm_bo_api.h
>> +++ b/include/drm/ttm/ttm_bo_api.h
>> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, 
>> struct file *filp,
>>   int ttm_bo_swapout(struct ttm_bo_global *glob,
>>               struct ttm_operation_ctx *ctx);
>>   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>>     /**
>>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>
>
Thomas Hellström (VMware) Aug. 21, 2019, 1:16 p.m. UTC | #8
On 8/20/19 4:53 PM, Daniel Vetter wrote:
> With nouveau fixed all ttm-using drives have the correct nesting of
> mmap_sem vs dma_resv, and we can just lock the buffer.
>
> Assuming I didn't screw up anything with my audit of course.
>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Huang Rui <ray.huang@amd.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>   include/drm/ttm/ttm_bo_api.h    |  1 -
>   3 files changed, 1 insertion(+), 60 deletions(-)
>
>
> +	dma_resv_lock(bo->base.resv, NULL);

interruptible, or at least killable. (IIRC think killable is the right 
choice in fault code, even if TTM initially implemented interruptible to 
get reasonable Xorg "silken mouse" latency).

Thanks,
/Thomas
Daniel Vetter Aug. 21, 2019, 2:09 p.m. UTC | #9
On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
> > On 8/20/19 4:53 PM, Daniel Vetter wrote:
> >> With nouveau fixed all ttm-using drives have the correct nesting of
> >> mmap_sem vs dma_resv, and we can just lock the buffer.
> >>
> >> Assuming I didn't screw up anything with my audit of course.
> >>
> >> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> >> Cc: Christian Koenig <christian.koenig@amd.com>
> >> Cc: Huang Rui <ray.huang@amd.com>
> >> Cc: Gerd Hoffmann <kraxel@redhat.com>
> >> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> >> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> >> ---
> >>   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >>   include/drm/ttm/ttm_bo_api.h    |  1 -
> >>   3 files changed, 1 insertion(+), 60 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index 20ff56f27aa4..a952dd624b06 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
> >> *bdev)
> >>           ;
> >>   }
> >>   EXPORT_SYMBOL(ttm_bo_swapout_all);
> >> -
> >> -/**
> >> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
> >> to become
> >> - * unreserved
> >> - *
> >> - * @bo: Pointer to buffer
> >> - */
> >> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> >> -{
> >> -    int ret;
> >> -
> >> -    /*
> >> -     * In the absense of a wait_unlocked API,
> >> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
> >> -     * concurrent use of this function. Note that this use of
> >> -     * bo::wu_mutex can go away if we change locking order to
> >> -     * mmap_sem -> bo::reserve.
> >> -     */
> >> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
> >> -    if (unlikely(ret != 0))
> >> -        return -ERESTARTSYS;
> >> -    if (!dma_resv_is_locked(bo->base.resv))
> >> -        goto out_unlock;
> >> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> >> -    if (ret == -EINTR)
> >> -        ret = -ERESTARTSYS;
> >> -    if (unlikely(ret != 0))
> >> -        goto out_unlock;
> >> -    dma_resv_unlock(bo->base.resv);
> >> -
> >> -out_unlock:
> >> -    mutex_unlock(&bo->wu_mutex);
> >> -    return ret;
> >> -}
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >> index 76eedb963693..505e1787aeea 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
> >> vm_fault *vmf)
> >>           &bdev->man[bo->mem.mem_type];
> >>       struct vm_area_struct cvma;
> >>   -    /*
> >> -     * Work around locking order reversal in fault / nopfn
> >> -     * between mmap_sem and bo_reserve: Perform a trylock operation
> >> -     * for reserve, and if it fails, retry the fault after waiting
> >> -     * for the buffer to become unreserved.
> >> -     */
> >> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> >> -                ttm_bo_get(bo);
> >> - up_read(&vmf->vma->vm_mm->mmap_sem);
> >> -                (void) ttm_bo_wait_unreserved(bo);
> >> -                ttm_bo_put(bo);
> >> -            }
> >> -
> >> -            return VM_FAULT_RETRY;
> >> -        }
> >> -
> >> -        /*
> >> -         * If we'd want to change locking order to
> >> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> >> -         * instead of retrying the fault...
> >> -         */
> >
> > I think you should justify why the above code is removed, since the
> > comments actually outlines what to do if we change locking order.
> >
> > The code that's removed above is not for adjusting locking orders but
> > to decrease the mm latency by releasing the mmap_sem while waiting for
> > bo reserve which in turn might be waiting for GPU. At a minimum we
> > should have a separate patch with justification.
> >
> > Note that the caller here ensures locking progress by adjusting the
> > RETRY flags after a retry.

That would be patches 1&2 in this series.

> And this last sentence also means we still can remove the wu-mutex when
> the locking order is changed, since the chance of livelock is goes away.
> IIRC only a single RETRY spin is allowed by the mm core.

Christian already noticed that one too, I simply forgot, it's fixed in
v2 I have here.
-Daniel

> /Thomas
>
> >
> > /Thomas
> >
> >
> >> -        return VM_FAULT_NOPAGE;
> >> -    }
> >> +    dma_resv_lock(bo->base.resv, NULL);
> >>         /*
> >>        * Refuse to fault imported pages. This should be handled
> >> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> >> index 43c4929a2171..6b50e624e3e2 100644
> >> --- a/include/drm/ttm/ttm_bo_api.h
> >> +++ b/include/drm/ttm/ttm_bo_api.h
> >> @@ -765,7 +765,6 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev,
> >> struct file *filp,
> >>   int ttm_bo_swapout(struct ttm_bo_global *glob,
> >>               struct ttm_operation_ctx *ctx);
> >>   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> >> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> >>     /**
> >>    * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> >
> >
>


--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
Daniel Vetter Aug. 21, 2019, 2:10 p.m. UTC | #10
On Wed, Aug 21, 2019 at 3:16 PM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
>
> On 8/20/19 4:53 PM, Daniel Vetter wrote:
> > With nouveau fixed all ttm-using drives have the correct nesting of
> > mmap_sem vs dma_resv, and we can just lock the buffer.
> >
> > Assuming I didn't screw up anything with my audit of course.
> >
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > Cc: Christian Koenig <christian.koenig@amd.com>
> > Cc: Huang Rui <ray.huang@amd.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> > Cc: Thomas Hellstrom <thellstrom@vmware.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >   include/drm/ttm/ttm_bo_api.h    |  1 -
> >   3 files changed, 1 insertion(+), 60 deletions(-)
> >
> >
> > +     dma_resv_lock(bo->base.resv, NULL);
>
> interruptible, or at least killable. (IIRC think killable is the right
> choice in fault code, even if TTM initially implemented interruptible to
> get reasonable Xorg "silken mouse" latency).

I think interruptible is fine. I chickend out of that for v1 because I
always mix up the return code for _lock_interruptibl() :-)

I'll add that for the next version too.
-Daniel
Thomas Hellström (VMware) Aug. 21, 2019, 2:27 p.m. UTC | #11
On 8/21/19 4:09 PM, Daniel Vetter wrote:
> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>
>>>> Assuming I didn't screw up anything with my audit of course.
>>>>
>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>> Cc: Huang Rui <ray.huang@amd.com>
>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>> ---
>>>>    drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>>    include/drm/ttm/ttm_bo_api.h    |  1 -
>>>>    3 files changed, 1 insertion(+), 60 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 20ff56f27aa4..a952dd624b06 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
>>>> *bdev)
>>>>            ;
>>>>    }
>>>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
>>>> -
>>>> -/**
>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
>>>> to become
>>>> - * unreserved
>>>> - *
>>>> - * @bo: Pointer to buffer
>>>> - */
>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>>> -{
>>>> -    int ret;
>>>> -
>>>> -    /*
>>>> -     * In the absense of a wait_unlocked API,
>>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
>>>> -     * concurrent use of this function. Note that this use of
>>>> -     * bo::wu_mutex can go away if we change locking order to
>>>> -     * mmap_sem -> bo::reserve.
>>>> -     */
>>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
>>>> -    if (unlikely(ret != 0))
>>>> -        return -ERESTARTSYS;
>>>> -    if (!dma_resv_is_locked(bo->base.resv))
>>>> -        goto out_unlock;
>>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>>> -    if (ret == -EINTR)
>>>> -        ret = -ERESTARTSYS;
>>>> -    if (unlikely(ret != 0))
>>>> -        goto out_unlock;
>>>> -    dma_resv_unlock(bo->base.resv);
>>>> -
>>>> -out_unlock:
>>>> -    mutex_unlock(&bo->wu_mutex);
>>>> -    return ret;
>>>> -}
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> index 76eedb963693..505e1787aeea 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
>>>> vm_fault *vmf)
>>>>            &bdev->man[bo->mem.mem_type];
>>>>        struct vm_area_struct cvma;
>>>>    -    /*
>>>> -     * Work around locking order reversal in fault / nopfn
>>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
>>>> -     * for reserve, and if it fails, retry the fault after waiting
>>>> -     * for the buffer to become unreserved.
>>>> -     */
>>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>>> -                ttm_bo_get(bo);
>>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
>>>> -                (void) ttm_bo_wait_unreserved(bo);
>>>> -                ttm_bo_put(bo);
>>>> -            }
>>>> -
>>>> -            return VM_FAULT_RETRY;
>>>> -        }
>>>> -
>>>> -        /*
>>>> -         * If we'd want to change locking order to
>>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>>> -         * instead of retrying the fault...
>>>> -         */
>>> I think you should justify why the above code is removed, since the
>>> comments actually outlines what to do if we change locking order.
>>>
>>> The code that's removed above is not for adjusting locking orders but
>>> to decrease the mm latency by releasing the mmap_sem while waiting for
>>> bo reserve which in turn might be waiting for GPU. At a minimum we
>>> should have a separate patch with justification.
>>>
>>> Note that the caller here ensures locking progress by adjusting the
>>> RETRY flags after a retry.
> That would be patches 1&2 in this series.
>
Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this 
patch should look along the lines of (based on an older tree) to 
implement the new locking-order mmap_sem->reservation,

but to keep the mm latency optimization using the RETRY functionality:


Thanks,
Thomas


diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 85f5bcbe0c76..68482c67b9f7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -125,30 +125,20 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
                 &bdev->man[bo->mem.mem_type];
         struct vm_area_struct cvma;
  
-       /*
-        * Work around locking order reversal in fault / nopfn
-        * between mmap_sem and bo_reserve: Perform a trylock operation
-        * for reserve, and if it fails, retry the fault after waiting
-        * for the buffer to become unreserved.
-        */
+       /* Avoid blocking on reservation with mmap_sem held, if possible */
         if (unlikely(!reservation_object_trylock(bo->base.resv))) {
-               if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
-                       if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
-                               ttm_bo_get(bo);
-                               up_read(&vmf->vma->vm_mm->mmap_sem);
-                               (void) ttm_bo_wait_unreserved(bo);
-                               ttm_bo_put(bo);
-                       }
+               if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
+                   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                       ttm_bo_get(bo);
+                       up_read(&vmf->vma->vm_mm->mmap_sem);
+                       (void) ttm_bo_wait_unreserved(bo);
+                       ttm_bo_put(bo);
  
                         return VM_FAULT_RETRY;
                 }
  
-               /*
-                * If we'd want to change locking order to
-                * mmap_sem -> bo::reserve, we'd use a blocking reserve here
-                * instead of retrying the fault...
-                */
-               return VM_FAULT_NOPAGE;
+               if (reservation_object_lock_interruptible(bo->base.resv, NULL))
+                       return VM_FAULT_NOPAGE;
         }
Thomas Hellström (VMware) Aug. 21, 2019, 2:30 p.m. UTC | #12
On 8/21/19 4:10 PM, Daniel Vetter wrote:
> On Wed, Aug 21, 2019 at 3:16 PM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>
>>> Assuming I didn't screw up anything with my audit of course.
>>>
>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Cc: Huang Rui <ray.huang@amd.com>
>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>    include/drm/ttm/ttm_bo_api.h    |  1 -
>>>    3 files changed, 1 insertion(+), 60 deletions(-)
>>>
>>>
>>> +     dma_resv_lock(bo->base.resv, NULL);
>> interruptible, or at least killable. (IIRC think killable is the right
>> choice in fault code, even if TTM initially implemented interruptible to
>> get reasonable Xorg "silken mouse" latency).
> I think interruptible is fine. I chickend out of that for v1 because I
> always mix up the return code for _lock_interruptibl() :-)

:). IIRC I think the in-kernel users of fault() were unhappy with 
interruptible.  (GUP?), but I guess it's better to use a bulk change at 
some point if necessary.

/Thomas

> I'll add that for the next version too.
> -Daniel
Daniel Vetter Aug. 21, 2019, 2:42 p.m. UTC | #13
On Wed, Aug 21, 2019 at 4:30 PM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
>
> On 8/21/19 4:10 PM, Daniel Vetter wrote:
> > On Wed, Aug 21, 2019 at 3:16 PM Thomas Hellström (VMware)
> > <thomas_os@shipmail.org> wrote:
> >> On 8/20/19 4:53 PM, Daniel Vetter wrote:
> >>> With nouveau fixed all ttm-using drives have the correct nesting of
> >>> mmap_sem vs dma_resv, and we can just lock the buffer.
> >>>
> >>> Assuming I didn't screw up anything with my audit of course.
> >>>
> >>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> >>> Cc: Christian Koenig <christian.koenig@amd.com>
> >>> Cc: Huang Rui <ray.huang@amd.com>
> >>> Cc: Gerd Hoffmann <kraxel@redhat.com>
> >>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> >>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> >>> ---
> >>>    drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >>>    include/drm/ttm/ttm_bo_api.h    |  1 -
> >>>    3 files changed, 1 insertion(+), 60 deletions(-)
> >>>
> >>>
> >>> +     dma_resv_lock(bo->base.resv, NULL);
> >> interruptible, or at least killable. (IIRC think killable is the right
> >> choice in fault code, even if TTM initially implemented interruptible to
> >> get reasonable Xorg "silken mouse" latency).
> > I think interruptible is fine. I chickend out of that for v1 because I
> > always mix up the return code for _lock_interruptibl() :-)
>
> :). IIRC I think the in-kernel users of fault() were unhappy with
> interruptible.  (GUP?), but I guess it's better to use a bulk change at
> some point if necessary.

We've been using interruptible since forever, returning
VM_FAULT_NOPAGE to get the signal handled and re-run. Seems to not
have pissed off anyone thus far. I think if we do this I'll do it as a
follow-up.
-Daniel
Daniel Vetter Aug. 21, 2019, 2:47 p.m. UTC | #14
On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
> On 8/21/19 4:09 PM, Daniel Vetter wrote:
> > On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
> > <thomas_os@shipmail.org> wrote:
> >> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
> >>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
> >>>> With nouveau fixed all ttm-using drives have the correct nesting of
> >>>> mmap_sem vs dma_resv, and we can just lock the buffer.
> >>>>
> >>>> Assuming I didn't screw up anything with my audit of course.
> >>>>
> >>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> >>>> Cc: Christian Koenig <christian.koenig@amd.com>
> >>>> Cc: Huang Rui <ray.huang@amd.com>
> >>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
> >>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> >>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> >>>> ---
> >>>>    drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >>>>    include/drm/ttm/ttm_bo_api.h    |  1 -
> >>>>    3 files changed, 1 insertion(+), 60 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >>>> index 20ff56f27aa4..a952dd624b06 100644
> >>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
> >>>> *bdev)
> >>>>            ;
> >>>>    }
> >>>>    EXPORT_SYMBOL(ttm_bo_swapout_all);
> >>>> -
> >>>> -/**
> >>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
> >>>> to become
> >>>> - * unreserved
> >>>> - *
> >>>> - * @bo: Pointer to buffer
> >>>> - */
> >>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> >>>> -{
> >>>> -    int ret;
> >>>> -
> >>>> -    /*
> >>>> -     * In the absense of a wait_unlocked API,
> >>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
> >>>> -     * concurrent use of this function. Note that this use of
> >>>> -     * bo::wu_mutex can go away if we change locking order to
> >>>> -     * mmap_sem -> bo::reserve.
> >>>> -     */
> >>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
> >>>> -    if (unlikely(ret != 0))
> >>>> -        return -ERESTARTSYS;
> >>>> -    if (!dma_resv_is_locked(bo->base.resv))
> >>>> -        goto out_unlock;
> >>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> >>>> -    if (ret == -EINTR)
> >>>> -        ret = -ERESTARTSYS;
> >>>> -    if (unlikely(ret != 0))
> >>>> -        goto out_unlock;
> >>>> -    dma_resv_unlock(bo->base.resv);
> >>>> -
> >>>> -out_unlock:
> >>>> -    mutex_unlock(&bo->wu_mutex);
> >>>> -    return ret;
> >>>> -}
> >>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>> index 76eedb963693..505e1787aeea 100644
> >>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
> >>>> vm_fault *vmf)
> >>>>            &bdev->man[bo->mem.mem_type];
> >>>>        struct vm_area_struct cvma;
> >>>>    -    /*
> >>>> -     * Work around locking order reversal in fault / nopfn
> >>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
> >>>> -     * for reserve, and if it fails, retry the fault after waiting
> >>>> -     * for the buffer to become unreserved.
> >>>> -     */
> >>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> >>>> -                ttm_bo_get(bo);
> >>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
> >>>> -                (void) ttm_bo_wait_unreserved(bo);
> >>>> -                ttm_bo_put(bo);
> >>>> -            }
> >>>> -
> >>>> -            return VM_FAULT_RETRY;
> >>>> -        }
> >>>> -
> >>>> -        /*
> >>>> -         * If we'd want to change locking order to
> >>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> >>>> -         * instead of retrying the fault...
> >>>> -         */
> >>> I think you should justify why the above code is removed, since the
> >>> comments actually outlines what to do if we change locking order.
> >>>
> >>> The code that's removed above is not for adjusting locking orders but
> >>> to decrease the mm latency by releasing the mmap_sem while waiting for
> >>> bo reserve which in turn might be waiting for GPU. At a minimum we
> >>> should have a separate patch with justification.
> >>>
> >>> Note that the caller here ensures locking progress by adjusting the
> >>> RETRY flags after a retry.
> > That would be patches 1&2 in this series.
> >
> Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this
> patch should look along the lines of (based on an older tree) to
> implement the new locking-order mmap_sem->reservation,

Only nouveau was breaking was doing copy_*_user or get_user_pages
while holding dma_resv locks, no one else. So nothing else needed to
be changed. But patch 1 contains the full audit. I might have missed
something.

> but to keep the mm latency optimization using the RETRY functionality:

Still no idea why this is needed? All the comments here and the code
and history seem like they've been about the mmap_sem vs dma_resv
inversion between driver ioctls and fault handling here. Once that's
officially fixed there's no reason to play games here and retry loops
- previously that was necessary because the old ttm_bo_vm_fault had a
busy spin and that's definitely not nice. If it's needed I think it
should be a second patch on top, to keep this all clear. I had to
audit an enormous amount of code, I'd like to make sure I didn't miss
anything before we start to make this super fancy again. Further
patches on top is obviously all fine with me.
-Daniel

> Thanks,
> Thomas
>
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 85f5bcbe0c76..68482c67b9f7 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -125,30 +125,20 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>                  &bdev->man[bo->mem.mem_type];
>          struct vm_area_struct cvma;
>
> -       /*
> -        * Work around locking order reversal in fault / nopfn
> -        * between mmap_sem and bo_reserve: Perform a trylock operation
> -        * for reserve, and if it fails, retry the fault after waiting
> -        * for the buffer to become unreserved.
> -        */
> +       /* Avoid blocking on reservation with mmap_sem held, if possible */
>          if (unlikely(!reservation_object_trylock(bo->base.resv))) {
> -               if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> -                       if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> -                               ttm_bo_get(bo);
> -                               up_read(&vmf->vma->vm_mm->mmap_sem);
> -                               (void) ttm_bo_wait_unreserved(bo);
> -                               ttm_bo_put(bo);
> -                       }
> +               if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
> +                   !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> +                       ttm_bo_get(bo);
> +                       up_read(&vmf->vma->vm_mm->mmap_sem);
> +                       (void) ttm_bo_wait_unreserved(bo);
> +                       ttm_bo_put(bo);
>
>                          return VM_FAULT_RETRY;
>                  }
>
> -               /*
> -                * If we'd want to change locking order to
> -                * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> -                * instead of retrying the fault...
> -                */
> -               return VM_FAULT_NOPAGE;
> +               if (reservation_object_lock_interruptible(bo->base.resv, NULL))
> +                       return VM_FAULT_NOPAGE;
>          }
>
>
Thomas Hellström (VMware) Aug. 21, 2019, 3:03 p.m. UTC | #15
On 8/21/19 4:47 PM, Daniel Vetter wrote:
> On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> On 8/21/19 4:09 PM, Daniel Vetter wrote:
>>> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
>>> <thomas_os@shipmail.org> wrote:
>>>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
>>>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>>>
>>>>>> Assuming I didn't screw up anything with my audit of course.
>>>>>>
>>>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>>>> Cc: Huang Rui <ray.huang@amd.com>
>>>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>>>>     drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>>>>     include/drm/ttm/ttm_bo_api.h    |  1 -
>>>>>>     3 files changed, 1 insertion(+), 60 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> index 20ff56f27aa4..a952dd624b06 100644
>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
>>>>>> *bdev)
>>>>>>             ;
>>>>>>     }
>>>>>>     EXPORT_SYMBOL(ttm_bo_swapout_all);
>>>>>> -
>>>>>> -/**
>>>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
>>>>>> to become
>>>>>> - * unreserved
>>>>>> - *
>>>>>> - * @bo: Pointer to buffer
>>>>>> - */
>>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>>>>> -{
>>>>>> -    int ret;
>>>>>> -
>>>>>> -    /*
>>>>>> -     * In the absense of a wait_unlocked API,
>>>>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
>>>>>> -     * concurrent use of this function. Note that this use of
>>>>>> -     * bo::wu_mutex can go away if we change locking order to
>>>>>> -     * mmap_sem -> bo::reserve.
>>>>>> -     */
>>>>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
>>>>>> -    if (unlikely(ret != 0))
>>>>>> -        return -ERESTARTSYS;
>>>>>> -    if (!dma_resv_is_locked(bo->base.resv))
>>>>>> -        goto out_unlock;
>>>>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>>>>> -    if (ret == -EINTR)
>>>>>> -        ret = -ERESTARTSYS;
>>>>>> -    if (unlikely(ret != 0))
>>>>>> -        goto out_unlock;
>>>>>> -    dma_resv_unlock(bo->base.resv);
>>>>>> -
>>>>>> -out_unlock:
>>>>>> -    mutex_unlock(&bo->wu_mutex);
>>>>>> -    return ret;
>>>>>> -}
>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>> index 76eedb963693..505e1787aeea 100644
>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
>>>>>> vm_fault *vmf)
>>>>>>             &bdev->man[bo->mem.mem_type];
>>>>>>         struct vm_area_struct cvma;
>>>>>>     -    /*
>>>>>> -     * Work around locking order reversal in fault / nopfn
>>>>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
>>>>>> -     * for reserve, and if it fails, retry the fault after waiting
>>>>>> -     * for the buffer to become unreserved.
>>>>>> -     */
>>>>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>>>>> -                ttm_bo_get(bo);
>>>>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
>>>>>> -                (void) ttm_bo_wait_unreserved(bo);
>>>>>> -                ttm_bo_put(bo);
>>>>>> -            }
>>>>>> -
>>>>>> -            return VM_FAULT_RETRY;
>>>>>> -        }
>>>>>> -
>>>>>> -        /*
>>>>>> -         * If we'd want to change locking order to
>>>>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>>>>> -         * instead of retrying the fault...
>>>>>> -         */
>>>>> I think you should justify why the above code is removed, since the
>>>>> comments actually outlines what to do if we change locking order.
>>>>>
>>>>> The code that's removed above is not for adjusting locking orders but
>>>>> to decrease the mm latency by releasing the mmap_sem while waiting for
>>>>> bo reserve which in turn might be waiting for GPU. At a minimum we
>>>>> should have a separate patch with justification.
>>>>>
>>>>> Note that the caller here ensures locking progress by adjusting the
>>>>> RETRY flags after a retry.
>>> That would be patches 1&2 in this series.
>>>
>> Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this
>> patch should look along the lines of (based on an older tree) to
>> implement the new locking-order mmap_sem->reservation,
> Only nouveau was breaking was doing copy_*_user or get_user_pages
> while holding dma_resv locks, no one else. So nothing else needed to
> be changed. But patch 1 contains the full audit. I might have missed
> something.
>
>> but to keep the mm latency optimization using the RETRY functionality:
> Still no idea why this is needed? All the comments here and the code
> and history seem like they've been about the mmap_sem vs dma_resv
> inversion between driver ioctls and fault handling here. Once that's
> officially fixed there's no reason to play games here and retry loops
> - previously that was necessary because the old ttm_bo_vm_fault had a
> busy spin and that's definitely not nice. If it's needed I think it
> should be a second patch on top, to keep this all clear. I had to
> audit an enormous amount of code, I'd like to make sure I didn't miss
> anything before we start to make this super fancy again. Further
> patches on top is obviously all fine with me.
> -Daniel

Yes, but there are two different things you remove here. One is the 
workaround for the locking reversal, which is obviously correct.

One is TTM's implementation of the mmap_sem latency optimization, which 
looks like an oversight.

That optimization is why the VM_FAULT_RETRY functionality was added to 
mm in the first place and is intended to be used when drivers expect a 
substantial sleep to avoid keeping the pretty globalish mmap_sem held 
while that sleep is taking place. We do exactly the same while waiting 
for move-fences (ttm_bo_vm_fault_idle) and other drivers that expect 
long waits in the fault handler do the same.

To avoid this accidently happening there was even this comment:

         /*
          * If we'd want to change locking order to
          * mmap_sem -> bo::reserve, we'd use a blocking reserve here
          * instead of retrying the fault...
          */
         return VM_FAULT_NOPAGE;

And I do really think we should avoid accidently removing this just to 
re-add it in a later patch, particularly when I pointed it out at review 
time.

/Thomas
Koenig, Christian Aug. 21, 2019, 3:07 p.m. UTC | #16
Am 21.08.19 um 16:47 schrieb Daniel Vetter:
> On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> On 8/21/19 4:09 PM, Daniel Vetter wrote:
>>> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
>>> <thomas_os@shipmail.org> wrote:
>>>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
>>>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>>>>> [SNIP]
>> but to keep the mm latency optimization using the RETRY functionality:
> Still no idea why this is needed? All the comments here and the code
> and history seem like they've been about the mmap_sem vs dma_resv
> inversion between driver ioctls and fault handling here. Once that's
> officially fixed there's no reason to play games here and retry loops
> - previously that was necessary because the old ttm_bo_vm_fault had a
> busy spin and that's definitely not nice. If it's needed I think it
> should be a second patch on top, to keep this all clear. I had to
> audit an enormous amount of code, I'd like to make sure I didn't miss
> anything before we start to make this super fancy again. Further
> patches on top is obviously all fine with me.

I think this is just an optimization to not hold the mmap_sem while 
waiting for the dma_resv lock.

I agree that it shouldn't be necessary, but maybe it's a good idea for 
performance. I'm also OK with removing it, cause I'm not sure if it's 
worth it.

But Thomas noted correctly that we should probably do it in a separate 
patch so that when somebody points out "Hey my system is slower now!" 
he's able to bisect to the change.

Christian.

> -Daniel
>
>> Thanks,
>> Thomas
>>
Daniel Vetter Aug. 21, 2019, 3:14 p.m. UTC | #17
On Wed, Aug 21, 2019 at 5:03 PM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
> On 8/21/19 4:47 PM, Daniel Vetter wrote:
> > On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
> > <thomas_os@shipmail.org> wrote:
> >> On 8/21/19 4:09 PM, Daniel Vetter wrote:
> >>> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
> >>> <thomas_os@shipmail.org> wrote:
> >>>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
> >>>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
> >>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
> >>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
> >>>>>>
> >>>>>> Assuming I didn't screw up anything with my audit of course.
> >>>>>>
> >>>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> >>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
> >>>>>> Cc: Huang Rui <ray.huang@amd.com>
> >>>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
> >>>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> >>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> >>>>>> ---
> >>>>>>     drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >>>>>>     drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >>>>>>     include/drm/ttm/ttm_bo_api.h    |  1 -
> >>>>>>     3 files changed, 1 insertion(+), 60 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >>>>>> index 20ff56f27aa4..a952dd624b06 100644
> >>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >>>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
> >>>>>> *bdev)
> >>>>>>             ;
> >>>>>>     }
> >>>>>>     EXPORT_SYMBOL(ttm_bo_swapout_all);
> >>>>>> -
> >>>>>> -/**
> >>>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
> >>>>>> to become
> >>>>>> - * unreserved
> >>>>>> - *
> >>>>>> - * @bo: Pointer to buffer
> >>>>>> - */
> >>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> >>>>>> -{
> >>>>>> -    int ret;
> >>>>>> -
> >>>>>> -    /*
> >>>>>> -     * In the absense of a wait_unlocked API,
> >>>>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
> >>>>>> -     * concurrent use of this function. Note that this use of
> >>>>>> -     * bo::wu_mutex can go away if we change locking order to
> >>>>>> -     * mmap_sem -> bo::reserve.
> >>>>>> -     */
> >>>>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
> >>>>>> -    if (unlikely(ret != 0))
> >>>>>> -        return -ERESTARTSYS;
> >>>>>> -    if (!dma_resv_is_locked(bo->base.resv))
> >>>>>> -        goto out_unlock;
> >>>>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> >>>>>> -    if (ret == -EINTR)
> >>>>>> -        ret = -ERESTARTSYS;
> >>>>>> -    if (unlikely(ret != 0))
> >>>>>> -        goto out_unlock;
> >>>>>> -    dma_resv_unlock(bo->base.resv);
> >>>>>> -
> >>>>>> -out_unlock:
> >>>>>> -    mutex_unlock(&bo->wu_mutex);
> >>>>>> -    return ret;
> >>>>>> -}
> >>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>> index 76eedb963693..505e1787aeea 100644
> >>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
> >>>>>> vm_fault *vmf)
> >>>>>>             &bdev->man[bo->mem.mem_type];
> >>>>>>         struct vm_area_struct cvma;
> >>>>>>     -    /*
> >>>>>> -     * Work around locking order reversal in fault / nopfn
> >>>>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
> >>>>>> -     * for reserve, and if it fails, retry the fault after waiting
> >>>>>> -     * for the buffer to become unreserved.
> >>>>>> -     */
> >>>>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >>>>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >>>>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> >>>>>> -                ttm_bo_get(bo);
> >>>>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
> >>>>>> -                (void) ttm_bo_wait_unreserved(bo);
> >>>>>> -                ttm_bo_put(bo);
> >>>>>> -            }
> >>>>>> -
> >>>>>> -            return VM_FAULT_RETRY;
> >>>>>> -        }
> >>>>>> -
> >>>>>> -        /*
> >>>>>> -         * If we'd want to change locking order to
> >>>>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> >>>>>> -         * instead of retrying the fault...
> >>>>>> -         */
> >>>>> I think you should justify why the above code is removed, since the
> >>>>> comments actually outlines what to do if we change locking order.
> >>>>>
> >>>>> The code that's removed above is not for adjusting locking orders but
> >>>>> to decrease the mm latency by releasing the mmap_sem while waiting for
> >>>>> bo reserve which in turn might be waiting for GPU. At a minimum we
> >>>>> should have a separate patch with justification.
> >>>>>
> >>>>> Note that the caller here ensures locking progress by adjusting the
> >>>>> RETRY flags after a retry.
> >>> That would be patches 1&2 in this series.
> >>>
> >> Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this
> >> patch should look along the lines of (based on an older tree) to
> >> implement the new locking-order mmap_sem->reservation,
> > Only nouveau was breaking was doing copy_*_user or get_user_pages
> > while holding dma_resv locks, no one else. So nothing else needed to
> > be changed. But patch 1 contains the full audit. I might have missed
> > something.
> >
> >> but to keep the mm latency optimization using the RETRY functionality:
> > Still no idea why this is needed? All the comments here and the code
> > and history seem like they've been about the mmap_sem vs dma_resv
> > inversion between driver ioctls and fault handling here. Once that's
> > officially fixed there's no reason to play games here and retry loops
> > - previously that was necessary because the old ttm_bo_vm_fault had a
> > busy spin and that's definitely not nice. If it's needed I think it
> > should be a second patch on top, to keep this all clear. I had to
> > audit an enormous amount of code, I'd like to make sure I didn't miss
> > anything before we start to make this super fancy again. Further
> > patches on top is obviously all fine with me.
> > -Daniel
>
> Yes, but there are two different things you remove here. One is the
> workaround for the locking reversal, which is obviously correct.
>
> One is TTM's implementation of the mmap_sem latency optimization, which
> looks like an oversight.
>
> That optimization is why the VM_FAULT_RETRY functionality was added to
> mm in the first place and is intended to be used when drivers expect a
> substantial sleep to avoid keeping the pretty globalish mmap_sem held
> while that sleep is taking place. We do exactly the same while waiting
> for move-fences (ttm_bo_vm_fault_idle) and other drivers that expect
> long waits in the fault handler do the same.

Hm, are we guaranteed that core mm will only call us once with
ALLOW_RETRY? Just to make sure that we're not live-locking here. I'd
also like to get rid of the wu_mutex, that just looks really strange
(and I thought it was to duct-tape over the inversion, not as an
optimization). If the livelock due to locking inversion is gone I have
no idea anymore why we even needs the wu_mutex.

> To avoid this accidently happening there was even this comment:
>
>          /*
>           * If we'd want to change locking order to
>           * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>           * instead of retrying the fault...
>           */
>          return VM_FAULT_NOPAGE;
>
> And I do really think we should avoid accidently removing this just to
> re-add it in a later patch, particularly when I pointed it out at review
> time.

Yeah I read that, but I didn't read this comment the way you now explained it.
-Daniel




--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
Thomas Hellström (VMware) Aug. 21, 2019, 3:19 p.m. UTC | #18
On 8/21/19 5:14 PM, Daniel Vetter wrote:
> On Wed, Aug 21, 2019 at 5:03 PM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> On 8/21/19 4:47 PM, Daniel Vetter wrote:
>>> On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
>>> <thomas_os@shipmail.org> wrote:
>>>> On 8/21/19 4:09 PM, Daniel Vetter wrote:
>>>>> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
>>>>> <thomas_os@shipmail.org> wrote:
>>>>>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
>>>>>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>>>>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>>>>>
>>>>>>>> Assuming I didn't screw up anything with my audit of course.
>>>>>>>>
>>>>>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>>>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>>>>>> Cc: Huang Rui <ray.huang@amd.com>
>>>>>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>>>>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>>>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>>>>>> ---
>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>>>>>>      include/drm/ttm/ttm_bo_api.h    |  1 -
>>>>>>>>      3 files changed, 1 insertion(+), 60 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> index 20ff56f27aa4..a952dd624b06 100644
>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
>>>>>>>> *bdev)
>>>>>>>>              ;
>>>>>>>>      }
>>>>>>>>      EXPORT_SYMBOL(ttm_bo_swapout_all);
>>>>>>>> -
>>>>>>>> -/**
>>>>>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
>>>>>>>> to become
>>>>>>>> - * unreserved
>>>>>>>> - *
>>>>>>>> - * @bo: Pointer to buffer
>>>>>>>> - */
>>>>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>>>>>>> -{
>>>>>>>> -    int ret;
>>>>>>>> -
>>>>>>>> -    /*
>>>>>>>> -     * In the absense of a wait_unlocked API,
>>>>>>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
>>>>>>>> -     * concurrent use of this function. Note that this use of
>>>>>>>> -     * bo::wu_mutex can go away if we change locking order to
>>>>>>>> -     * mmap_sem -> bo::reserve.
>>>>>>>> -     */
>>>>>>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
>>>>>>>> -    if (unlikely(ret != 0))
>>>>>>>> -        return -ERESTARTSYS;
>>>>>>>> -    if (!dma_resv_is_locked(bo->base.resv))
>>>>>>>> -        goto out_unlock;
>>>>>>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>>>>>>> -    if (ret == -EINTR)
>>>>>>>> -        ret = -ERESTARTSYS;
>>>>>>>> -    if (unlikely(ret != 0))
>>>>>>>> -        goto out_unlock;
>>>>>>>> -    dma_resv_unlock(bo->base.resv);
>>>>>>>> -
>>>>>>>> -out_unlock:
>>>>>>>> -    mutex_unlock(&bo->wu_mutex);
>>>>>>>> -    return ret;
>>>>>>>> -}
>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>> index 76eedb963693..505e1787aeea 100644
>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
>>>>>>>> vm_fault *vmf)
>>>>>>>>              &bdev->man[bo->mem.mem_type];
>>>>>>>>          struct vm_area_struct cvma;
>>>>>>>>      -    /*
>>>>>>>> -     * Work around locking order reversal in fault / nopfn
>>>>>>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
>>>>>>>> -     * for reserve, and if it fails, retry the fault after waiting
>>>>>>>> -     * for the buffer to become unreserved.
>>>>>>>> -     */
>>>>>>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>>>>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>>>>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>>>>>>> -                ttm_bo_get(bo);
>>>>>>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
>>>>>>>> -                (void) ttm_bo_wait_unreserved(bo);
>>>>>>>> -                ttm_bo_put(bo);
>>>>>>>> -            }
>>>>>>>> -
>>>>>>>> -            return VM_FAULT_RETRY;
>>>>>>>> -        }
>>>>>>>> -
>>>>>>>> -        /*
>>>>>>>> -         * If we'd want to change locking order to
>>>>>>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>>>>>>> -         * instead of retrying the fault...
>>>>>>>> -         */
>>>>>>> I think you should justify why the above code is removed, since the
>>>>>>> comments actually outlines what to do if we change locking order.
>>>>>>>
>>>>>>> The code that's removed above is not for adjusting locking orders but
>>>>>>> to decrease the mm latency by releasing the mmap_sem while waiting for
>>>>>>> bo reserve which in turn might be waiting for GPU. At a minimum we
>>>>>>> should have a separate patch with justification.
>>>>>>>
>>>>>>> Note that the caller here ensures locking progress by adjusting the
>>>>>>> RETRY flags after a retry.
>>>>> That would be patches 1&2 in this series.
>>>>>
>>>> Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this
>>>> patch should look along the lines of (based on an older tree) to
>>>> implement the new locking-order mmap_sem->reservation,
>>> Only nouveau was breaking was doing copy_*_user or get_user_pages
>>> while holding dma_resv locks, no one else. So nothing else needed to
>>> be changed. But patch 1 contains the full audit. I might have missed
>>> something.
>>>
>>>> but to keep the mm latency optimization using the RETRY functionality:
>>> Still no idea why this is needed? All the comments here and the code
>>> and history seem like they've been about the mmap_sem vs dma_resv
>>> inversion between driver ioctls and fault handling here. Once that's
>>> officially fixed there's no reason to play games here and retry loops
>>> - previously that was necessary because the old ttm_bo_vm_fault had a
>>> busy spin and that's definitely not nice. If it's needed I think it
>>> should be a second patch on top, to keep this all clear. I had to
>>> audit an enormous amount of code, I'd like to make sure I didn't miss
>>> anything before we start to make this super fancy again. Further
>>> patches on top is obviously all fine with me.
>>> -Daniel
>> Yes, but there are two different things you remove here. One is the
>> workaround for the locking reversal, which is obviously correct.
>>
>> One is TTM's implementation of the mmap_sem latency optimization, which
>> looks like an oversight.
>>
>> That optimization is why the VM_FAULT_RETRY functionality was added to
>> mm in the first place and is intended to be used when drivers expect a
>> substantial sleep to avoid keeping the pretty globalish mmap_sem held
>> while that sleep is taking place. We do exactly the same while waiting
>> for move-fences (ttm_bo_vm_fault_idle) and other drivers that expect
>> long waits in the fault handler do the same.
> Hm, are we guaranteed that core mm will only call us once with
> ALLOW_RETRY?

Last time I looked in the implementation, yes. The ALLOW_RETRY was there 
to specifically
allow making progress in the locking.

>   Just to make sure that we're not live-locking here. I'd
> also like to get rid of the wu_mutex, that just looks really strange
> (and I thought it was to duct-tape over the inversion, not as an
> optimization). If the livelock due to locking inversion is gone I have
> no idea anymore why we even needs the wu_mutex.

Yes, my interpretation of this is that wu_mutex definitely can be ditched.

/Thomas
Daniel Vetter Aug. 21, 2019, 3:22 p.m. UTC | #19
On Wed, Aug 21, 2019 at 5:19 PM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
> On 8/21/19 5:14 PM, Daniel Vetter wrote:
> > On Wed, Aug 21, 2019 at 5:03 PM Thomas Hellström (VMware)
> > <thomas_os@shipmail.org> wrote:
> >> On 8/21/19 4:47 PM, Daniel Vetter wrote:
> >>> On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
> >>> <thomas_os@shipmail.org> wrote:
> >>>> On 8/21/19 4:09 PM, Daniel Vetter wrote:
> >>>>> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
> >>>>> <thomas_os@shipmail.org> wrote:
> >>>>>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
> >>>>>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
> >>>>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
> >>>>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
> >>>>>>>>
> >>>>>>>> Assuming I didn't screw up anything with my audit of course.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> >>>>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
> >>>>>>>> Cc: Huang Rui <ray.huang@amd.com>
> >>>>>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
> >>>>>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
> >>>>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> >>>>>>>> ---
> >>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
> >>>>>>>>      drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
> >>>>>>>>      include/drm/ttm/ttm_bo_api.h    |  1 -
> >>>>>>>>      3 files changed, 1 insertion(+), 60 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >>>>>>>> index 20ff56f27aa4..a952dd624b06 100644
> >>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >>>>>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
> >>>>>>>> *bdev)
> >>>>>>>>              ;
> >>>>>>>>      }
> >>>>>>>>      EXPORT_SYMBOL(ttm_bo_swapout_all);
> >>>>>>>> -
> >>>>>>>> -/**
> >>>>>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
> >>>>>>>> to become
> >>>>>>>> - * unreserved
> >>>>>>>> - *
> >>>>>>>> - * @bo: Pointer to buffer
> >>>>>>>> - */
> >>>>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> >>>>>>>> -{
> >>>>>>>> -    int ret;
> >>>>>>>> -
> >>>>>>>> -    /*
> >>>>>>>> -     * In the absense of a wait_unlocked API,
> >>>>>>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
> >>>>>>>> -     * concurrent use of this function. Note that this use of
> >>>>>>>> -     * bo::wu_mutex can go away if we change locking order to
> >>>>>>>> -     * mmap_sem -> bo::reserve.
> >>>>>>>> -     */
> >>>>>>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
> >>>>>>>> -    if (unlikely(ret != 0))
> >>>>>>>> -        return -ERESTARTSYS;
> >>>>>>>> -    if (!dma_resv_is_locked(bo->base.resv))
> >>>>>>>> -        goto out_unlock;
> >>>>>>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
> >>>>>>>> -    if (ret == -EINTR)
> >>>>>>>> -        ret = -ERESTARTSYS;
> >>>>>>>> -    if (unlikely(ret != 0))
> >>>>>>>> -        goto out_unlock;
> >>>>>>>> -    dma_resv_unlock(bo->base.resv);
> >>>>>>>> -
> >>>>>>>> -out_unlock:
> >>>>>>>> -    mutex_unlock(&bo->wu_mutex);
> >>>>>>>> -    return ret;
> >>>>>>>> -}
> >>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>>>> index 76eedb963693..505e1787aeea 100644
> >>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> >>>>>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
> >>>>>>>> vm_fault *vmf)
> >>>>>>>>              &bdev->man[bo->mem.mem_type];
> >>>>>>>>          struct vm_area_struct cvma;
> >>>>>>>>      -    /*
> >>>>>>>> -     * Work around locking order reversal in fault / nopfn
> >>>>>>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
> >>>>>>>> -     * for reserve, and if it fails, retry the fault after waiting
> >>>>>>>> -     * for the buffer to become unreserved.
> >>>>>>>> -     */
> >>>>>>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
> >>>>>>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >>>>>>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> >>>>>>>> -                ttm_bo_get(bo);
> >>>>>>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
> >>>>>>>> -                (void) ttm_bo_wait_unreserved(bo);
> >>>>>>>> -                ttm_bo_put(bo);
> >>>>>>>> -            }
> >>>>>>>> -
> >>>>>>>> -            return VM_FAULT_RETRY;
> >>>>>>>> -        }
> >>>>>>>> -
> >>>>>>>> -        /*
> >>>>>>>> -         * If we'd want to change locking order to
> >>>>>>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> >>>>>>>> -         * instead of retrying the fault...
> >>>>>>>> -         */
> >>>>>>> I think you should justify why the above code is removed, since the
> >>>>>>> comments actually outlines what to do if we change locking order.
> >>>>>>>
> >>>>>>> The code that's removed above is not for adjusting locking orders but
> >>>>>>> to decrease the mm latency by releasing the mmap_sem while waiting for
> >>>>>>> bo reserve which in turn might be waiting for GPU. At a minimum we
> >>>>>>> should have a separate patch with justification.
> >>>>>>>
> >>>>>>> Note that the caller here ensures locking progress by adjusting the
> >>>>>>> RETRY flags after a retry.
> >>>>> That would be patches 1&2 in this series.
> >>>>>
> >>>> Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this
> >>>> patch should look along the lines of (based on an older tree) to
> >>>> implement the new locking-order mmap_sem->reservation,
> >>> Only nouveau was breaking was doing copy_*_user or get_user_pages
> >>> while holding dma_resv locks, no one else. So nothing else needed to
> >>> be changed. But patch 1 contains the full audit. I might have missed
> >>> something.
> >>>
> >>>> but to keep the mm latency optimization using the RETRY functionality:
> >>> Still no idea why this is needed? All the comments here and the code
> >>> and history seem like they've been about the mmap_sem vs dma_resv
> >>> inversion between driver ioctls and fault handling here. Once that's
> >>> officially fixed there's no reason to play games here and retry loops
> >>> - previously that was necessary because the old ttm_bo_vm_fault had a
> >>> busy spin and that's definitely not nice. If it's needed I think it
> >>> should be a second patch on top, to keep this all clear. I had to
> >>> audit an enormous amount of code, I'd like to make sure I didn't miss
> >>> anything before we start to make this super fancy again. Further
> >>> patches on top is obviously all fine with me.
> >>> -Daniel
> >> Yes, but there are two different things you remove here. One is the
> >> workaround for the locking reversal, which is obviously correct.
> >>
> >> One is TTM's implementation of the mmap_sem latency optimization, which
> >> looks like an oversight.
> >>
> >> That optimization is why the VM_FAULT_RETRY functionality was added to
> >> mm in the first place and is intended to be used when drivers expect a
> >> substantial sleep to avoid keeping the pretty globalish mmap_sem held
> >> while that sleep is taking place. We do exactly the same while waiting
> >> for move-fences (ttm_bo_vm_fault_idle) and other drivers that expect
> >> long waits in the fault handler do the same.
> > Hm, are we guaranteed that core mm will only call us once with
> > ALLOW_RETRY?
>
> Last time I looked in the implementation, yes. The ALLOW_RETRY was there
> to specifically
> allow making progress in the locking.
>
> >   Just to make sure that we're not live-locking here. I'd
> > also like to get rid of the wu_mutex, that just looks really strange
> > (and I thought it was to duct-tape over the inversion, not as an
> > optimization). If the livelock due to locking inversion is gone I have
> > no idea anymore why we even needs the wu_mutex.
>
> Yes, my interpretation of this is that wu_mutex definitely can be ditched.

Ok I'll respin and just do normal interruptible locks, keeping the
mmap_sem-less path. I think perfectly ok to leave the optimization in,
as long as we can remove the wu_mutex.

btw r-b/t-b on patch 1 from vmwgfx would be very much appreciated.
That one is the real trick in this series here I think.
-Daniel
Thomas Hellström (VMware) Aug. 21, 2019, 3:34 p.m. UTC | #20
On 8/21/19 5:22 PM, Daniel Vetter wrote:
> On Wed, Aug 21, 2019 at 5:19 PM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> On 8/21/19 5:14 PM, Daniel Vetter wrote:
>>> On Wed, Aug 21, 2019 at 5:03 PM Thomas Hellström (VMware)
>>> <thomas_os@shipmail.org> wrote:
>>>> On 8/21/19 4:47 PM, Daniel Vetter wrote:
>>>>> On Wed, Aug 21, 2019 at 4:27 PM Thomas Hellström (VMware)
>>>>> <thomas_os@shipmail.org> wrote:
>>>>>> On 8/21/19 4:09 PM, Daniel Vetter wrote:
>>>>>>> On Wed, Aug 21, 2019 at 2:47 PM Thomas Hellström (VMware)
>>>>>>> <thomas_os@shipmail.org> wrote:
>>>>>>>> On 8/21/19 2:40 PM, Thomas Hellström (VMware) wrote:
>>>>>>>>> On 8/20/19 4:53 PM, Daniel Vetter wrote:
>>>>>>>>>> With nouveau fixed all ttm-using drives have the correct nesting of
>>>>>>>>>> mmap_sem vs dma_resv, and we can just lock the buffer.
>>>>>>>>>>
>>>>>>>>>> Assuming I didn't screw up anything with my audit of course.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>>>>>>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>>>>>>>> Cc: Huang Rui <ray.huang@amd.com>
>>>>>>>>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>>>>>>>>> Cc: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
>>>>>>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>>>>>>>> ---
>>>>>>>>>>       drivers/gpu/drm/ttm/ttm_bo.c    | 34 ---------------------------------
>>>>>>>>>>       drivers/gpu/drm/ttm/ttm_bo_vm.c | 26 +------------------------
>>>>>>>>>>       include/drm/ttm/ttm_bo_api.h    |  1 -
>>>>>>>>>>       3 files changed, 1 insertion(+), 60 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> index 20ff56f27aa4..a952dd624b06 100644
>>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> @@ -1954,37 +1954,3 @@ void ttm_bo_swapout_all(struct ttm_bo_device
>>>>>>>>>> *bdev)
>>>>>>>>>>               ;
>>>>>>>>>>       }
>>>>>>>>>>       EXPORT_SYMBOL(ttm_bo_swapout_all);
>>>>>>>>>> -
>>>>>>>>>> -/**
>>>>>>>>>> - * ttm_bo_wait_unreserved - interruptible wait for a buffer object
>>>>>>>>>> to become
>>>>>>>>>> - * unreserved
>>>>>>>>>> - *
>>>>>>>>>> - * @bo: Pointer to buffer
>>>>>>>>>> - */
>>>>>>>>>> -int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
>>>>>>>>>> -{
>>>>>>>>>> -    int ret;
>>>>>>>>>> -
>>>>>>>>>> -    /*
>>>>>>>>>> -     * In the absense of a wait_unlocked API,
>>>>>>>>>> -     * Use the bo::wu_mutex to avoid triggering livelocks due to
>>>>>>>>>> -     * concurrent use of this function. Note that this use of
>>>>>>>>>> -     * bo::wu_mutex can go away if we change locking order to
>>>>>>>>>> -     * mmap_sem -> bo::reserve.
>>>>>>>>>> -     */
>>>>>>>>>> -    ret = mutex_lock_interruptible(&bo->wu_mutex);
>>>>>>>>>> -    if (unlikely(ret != 0))
>>>>>>>>>> -        return -ERESTARTSYS;
>>>>>>>>>> -    if (!dma_resv_is_locked(bo->base.resv))
>>>>>>>>>> -        goto out_unlock;
>>>>>>>>>> -    ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
>>>>>>>>>> -    if (ret == -EINTR)
>>>>>>>>>> -        ret = -ERESTARTSYS;
>>>>>>>>>> -    if (unlikely(ret != 0))
>>>>>>>>>> -        goto out_unlock;
>>>>>>>>>> -    dma_resv_unlock(bo->base.resv);
>>>>>>>>>> -
>>>>>>>>>> -out_unlock:
>>>>>>>>>> -    mutex_unlock(&bo->wu_mutex);
>>>>>>>>>> -    return ret;
>>>>>>>>>> -}
>>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>>>> index 76eedb963693..505e1787aeea 100644
>>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>>>>>>>>> @@ -125,31 +125,7 @@ static vm_fault_t ttm_bo_vm_fault(struct
>>>>>>>>>> vm_fault *vmf)
>>>>>>>>>>               &bdev->man[bo->mem.mem_type];
>>>>>>>>>>           struct vm_area_struct cvma;
>>>>>>>>>>       -    /*
>>>>>>>>>> -     * Work around locking order reversal in fault / nopfn
>>>>>>>>>> -     * between mmap_sem and bo_reserve: Perform a trylock operation
>>>>>>>>>> -     * for reserve, and if it fails, retry the fault after waiting
>>>>>>>>>> -     * for the buffer to become unreserved.
>>>>>>>>>> -     */
>>>>>>>>>> -    if (unlikely(!dma_resv_trylock(bo->base.resv))) {
>>>>>>>>>> -        if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>>>>>>>>>> -            if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
>>>>>>>>>> -                ttm_bo_get(bo);
>>>>>>>>>> - up_read(&vmf->vma->vm_mm->mmap_sem);
>>>>>>>>>> -                (void) ttm_bo_wait_unreserved(bo);
>>>>>>>>>> -                ttm_bo_put(bo);
>>>>>>>>>> -            }
>>>>>>>>>> -
>>>>>>>>>> -            return VM_FAULT_RETRY;
>>>>>>>>>> -        }
>>>>>>>>>> -
>>>>>>>>>> -        /*
>>>>>>>>>> -         * If we'd want to change locking order to
>>>>>>>>>> -         * mmap_sem -> bo::reserve, we'd use a blocking reserve here
>>>>>>>>>> -         * instead of retrying the fault...
>>>>>>>>>> -         */
>>>>>>>>> I think you should justify why the above code is removed, since the
>>>>>>>>> comments actually outlines what to do if we change locking order.
>>>>>>>>>
>>>>>>>>> The code that's removed above is not for adjusting locking orders but
>>>>>>>>> to decrease the mm latency by releasing the mmap_sem while waiting for
>>>>>>>>> bo reserve which in turn might be waiting for GPU. At a minimum we
>>>>>>>>> should have a separate patch with justification.
>>>>>>>>>
>>>>>>>>> Note that the caller here ensures locking progress by adjusting the
>>>>>>>>> RETRY flags after a retry.
>>>>>>> That would be patches 1&2 in this series.
>>>>>>>
>>>>>> Hmm? Those seem to touch only dma-buf and nouveau not ttm?  I mean this
>>>>>> patch should look along the lines of (based on an older tree) to
>>>>>> implement the new locking-order mmap_sem->reservation,
>>>>> Only nouveau was breaking was doing copy_*_user or get_user_pages
>>>>> while holding dma_resv locks, no one else. So nothing else needed to
>>>>> be changed. But patch 1 contains the full audit. I might have missed
>>>>> something.
>>>>>
>>>>>> but to keep the mm latency optimization using the RETRY functionality:
>>>>> Still no idea why this is needed? All the comments here and the code
>>>>> and history seem like they've been about the mmap_sem vs dma_resv
>>>>> inversion between driver ioctls and fault handling here. Once that's
>>>>> officially fixed there's no reason to play games here and retry loops
>>>>> - previously that was necessary because the old ttm_bo_vm_fault had a
>>>>> busy spin and that's definitely not nice. If it's needed I think it
>>>>> should be a second patch on top, to keep this all clear. I had to
>>>>> audit an enormous amount of code, I'd like to make sure I didn't miss
>>>>> anything before we start to make this super fancy again. Further
>>>>> patches on top is obviously all fine with me.
>>>>> -Daniel
>>>> Yes, but there are two different things you remove here. One is the
>>>> workaround for the locking reversal, which is obviously correct.
>>>>
>>>> One is TTM's implementation of the mmap_sem latency optimization, which
>>>> looks like an oversight.
>>>>
>>>> That optimization is why the VM_FAULT_RETRY functionality was added to
>>>> mm in the first place and is intended to be used when drivers expect a
>>>> substantial sleep to avoid keeping the pretty globalish mmap_sem held
>>>> while that sleep is taking place. We do exactly the same while waiting
>>>> for move-fences (ttm_bo_vm_fault_idle) and other drivers that expect
>>>> long waits in the fault handler do the same.
>>> Hm, are we guaranteed that core mm will only call us once with
>>> ALLOW_RETRY?
>> Last time I looked in the implementation, yes. The ALLOW_RETRY was there
>> to specifically
>> allow making progress in the locking.
>>
>>>    Just to make sure that we're not live-locking here. I'd
>>> also like to get rid of the wu_mutex, that just looks really strange
>>> (and I thought it was to duct-tape over the inversion, not as an
>>> optimization). If the livelock due to locking inversion is gone I have
>>> no idea anymore why we even needs the wu_mutex.
>> Yes, my interpretation of this is that wu_mutex definitely can be ditched.
> Ok I'll respin and just do normal interruptible locks, keeping the
> mmap_sem-less path. I think perfectly ok to leave the optimization in,
> as long as we can remove the wu_mutex.
>
> btw r-b/t-b on patch 1 from vmwgfx would be very much appreciated.
> That one is the real trick in this series here I think.

Thanks!

I'll look into patch 1 as well.

/Thomas



> -Daniel

Patch
diff mbox series

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 20ff56f27aa4..a952dd624b06 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1954,37 +1954,3 @@  void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
 		;
 }
 EXPORT_SYMBOL(ttm_bo_swapout_all);
-
-/**
- * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
- * unreserved
- *
- * @bo: Pointer to buffer
- */
-int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
-{
-	int ret;
-
-	/*
-	 * In the absense of a wait_unlocked API,
-	 * Use the bo::wu_mutex to avoid triggering livelocks due to
-	 * concurrent use of this function. Note that this use of
-	 * bo::wu_mutex can go away if we change locking order to
-	 * mmap_sem -> bo::reserve.
-	 */
-	ret = mutex_lock_interruptible(&bo->wu_mutex);
-	if (unlikely(ret != 0))
-		return -ERESTARTSYS;
-	if (!dma_resv_is_locked(bo->base.resv))
-		goto out_unlock;
-	ret = dma_resv_lock_interruptible(bo->base.resv, NULL);
-	if (ret == -EINTR)
-		ret = -ERESTARTSYS;
-	if (unlikely(ret != 0))
-		goto out_unlock;
-	dma_resv_unlock(bo->base.resv);
-
-out_unlock:
-	mutex_unlock(&bo->wu_mutex);
-	return ret;
-}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 76eedb963693..505e1787aeea 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -125,31 +125,7 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		&bdev->man[bo->mem.mem_type];
 	struct vm_area_struct cvma;
 
-	/*
-	 * Work around locking order reversal in fault / nopfn
-	 * between mmap_sem and bo_reserve: Perform a trylock operation
-	 * for reserve, and if it fails, retry the fault after waiting
-	 * for the buffer to become unreserved.
-	 */
-	if (unlikely(!dma_resv_trylock(bo->base.resv))) {
-		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
-			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
-				ttm_bo_get(bo);
-				up_read(&vmf->vma->vm_mm->mmap_sem);
-				(void) ttm_bo_wait_unreserved(bo);
-				ttm_bo_put(bo);
-			}
-
-			return VM_FAULT_RETRY;
-		}
-
-		/*
-		 * If we'd want to change locking order to
-		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
-		 * instead of retrying the fault...
-		 */
-		return VM_FAULT_NOPAGE;
-	}
+	dma_resv_lock(bo->base.resv, NULL);
 
 	/*
 	 * Refuse to fault imported pages. This should be handled
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 43c4929a2171..6b50e624e3e2 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -765,7 +765,6 @@  ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
 int ttm_bo_swapout(struct ttm_bo_global *glob,
 			struct ttm_operation_ctx *ctx);
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
-int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
 
 /**
  * ttm_bo_uses_embedded_gem_object - check if the given bo uses the