diff mbox

drm/radeon: fence virtual address and free it once idle v3

Message ID 1344029208-10069-1-git-send-email-j.glisse@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jerome Glisse Aug. 3, 2012, 9:26 p.m. UTC
From: Jerome Glisse <jglisse@redhat.com>

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking.

v2: For to update pagetable when unbinding bo (don't bailout if
    bo_va->valid is true).
v3: Add kernel 3.5/3.4 comment.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |    1 +
 drivers/gpu/drm/radeon/radeon_cs.c     |   32 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
 drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
 drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
 5 files changed, 55 insertions(+), 21 deletions(-)

Comments

Christian König Aug. 4, 2012, 9:07 a.m. UTC | #1
On 03.08.2012 23:26, j.glisse@gmail.com wrote:
> From: Jerome Glisse <jglisse@redhat.com>
>
> Virtual address need to be fenced to know when we can safely remove it.
> This patch also properly clear the pagetable. Previously it was
> serouisly broken.
>
> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking.
>
> v2: For to update pagetable when unbinding bo (don't bailout if
>      bo_va->valid is true).
> v3: Add kernel 3.5/3.4 comment.
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
It should fix the problem, going to test it as soon as I find some 5min 
spare in my vacation. Till then it is:

Reviewed-by: Christian König <christian.koenig@amd.com>

In the future I would really like to make the updating of the PTE also 
async, that should save us allot of problems here.

Christian.


> ---
>   drivers/gpu/drm/radeon/radeon.h        |    1 +
>   drivers/gpu/drm/radeon/radeon_cs.c     |   32 +++++++++++++++++++++++++++++---
>   drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
>   drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
>   drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
>   5 files changed, 55 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5431af2..8d75c65 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>   	uint64_t			soffset;
>   	uint64_t			eoffset;
>   	uint32_t			flags;
> +	struct radeon_fence		*fence;
>   	bool				valid;
>   };
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 8a4c49e..995f3ab 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>   	return 0;
>   }
>   
> +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
> +				  struct radeon_fence *fence)
> +{
> +	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
> +	struct radeon_vm *vm = &fpriv->vm;
> +	struct radeon_bo_list *lobj;
> +	int r;
> +
> +	if (parser->chunk_ib_idx == -1)
> +		return;
> +	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
> +		return;
> +
> +	list_for_each_entry(lobj, &parser->validated, tv.head) {
> +		struct radeon_bo_va *bo_va;
> +		struct radeon_bo *rbo = lobj->bo;
> +
> +		bo_va = radeon_bo_va(rbo, vm);
> +		radeon_fence_unref(&bo_va->fence);
> +		bo_va->fence = radeon_fence_ref(fence);
> +	}
> +	return 0;
> +}
> +
>   /**
>    * cs_parser_fini() - clean parser states
>    * @parser:	parser structure holding parsing context.
> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
>   {
>   	unsigned i;
>   
> -	if (!error)
> +	if (!error) {
> +		/* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */
> +		radeon_bo_vm_fence_va(parser, parser->ib.fence);
>   		ttm_eu_fence_buffer_objects(&parser->validated,
>   					    parser->ib.fence);
> -	else
> +	} else {
>   		ttm_eu_backoff_reservation(&parser->validated);
> +	}
>   
>   	if (parser->relocs != NULL) {
>   		for (i = 0; i < parser->nrelocs; i++) {
> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
>   
>   	if (parser->chunk_ib_idx == -1)
>   		return 0;
> -
>   	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>   		return 0;
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
> index b372005..9912182 100644
> --- a/drivers/gpu/drm/radeon/radeon_gart.c
> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
>   		return -EINVAL;
>   	}
>   
> -	if (bo_va->valid)
> +	if (bo_va->valid && mem)
>   		return 0;
>   
>   	ngpu_pages = radeon_bo_ngpu_pages(bo);
> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>   		     struct radeon_bo *bo)
>   {
>   	struct radeon_bo_va *bo_va;
> +	int r;
>   
>   	bo_va = radeon_bo_va(bo, vm);
>   	if (bo_va == NULL)
>   		return 0;
>   
> +	/* wait for va use to end */
> +	while (bo_va->fence) {
> +		r = radeon_fence_wait(bo_va->fence, false);
> +		if (r) {
> +			DRM_ERROR("error while waiting for fence: %d\n", r);
> +		}
> +		if (r == -EDEADLK) {
> +			r = radeon_gpu_reset(rdev);
> +			if (!r)
> +				continue;
> +		}
> +		break;
> +	}
> +	radeon_fence_unref(&bo_va->fence);
> +
>   	mutex_lock(&rdev->vm_manager.lock);
>   	mutex_lock(&vm->mutex);
>   	radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
>   	radeon_vm_unbind_locked(rdev, vm);
>   	mutex_unlock(&rdev->vm_manager.lock);
>   
> -	/* remove all bo */
> +	/* remove all bo at this point non are busy any more because unbind
> +	 * waited for the last vm fence to signal
> +	 */
>   	r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>   	if (!r) {
>   		bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>   		list_del_init(&bo_va->bo_list);
>   		list_del_init(&bo_va->vm_list);
> +		radeon_fence_unref(&bo_va->fence);
>   		radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>   		kfree(bo_va);
>   	}
> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
>   		r = radeon_bo_reserve(bo_va->bo, false);
>   		if (!r) {
>   			list_del_init(&bo_va->bo_list);
> +			radeon_fence_unref(&bo_va->fence);
>   			radeon_bo_unreserve(bo_va->bo);
>   			kfree(bo_va);
>   		}
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index 84d0452..1b57b00 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object *obj,
>   	struct radeon_device *rdev = rbo->rdev;
>   	struct radeon_fpriv *fpriv = file_priv->driver_priv;
>   	struct radeon_vm *vm = &fpriv->vm;
> -	struct radeon_bo_va *bo_va, *tmp;
>   
>   	if (rdev->family < CHIP_CAYMAN) {
>   		return;
>   	}
>   
>   	if (radeon_bo_reserve(rbo, false)) {
> +		dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n");
>   		return;
>   	}
> -	list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
> -		if (bo_va->vm == vm) {
> -			/* remove from this vm address space */
> -			mutex_lock(&vm->mutex);
> -			list_del(&bo_va->vm_list);
> -			mutex_unlock(&vm->mutex);
> -			list_del(&bo_va->bo_list);
> -			kfree(bo_va);
> -		}
> -	}
> +	radeon_vm_bo_rmv(rdev, vm, rbo);
>   	radeon_bo_unreserve(rbo);
>   }
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index 1f1a4c8..1cb014b 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>   
>   	list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>   		/* remove from all vm address space */
> -		mutex_lock(&bo_va->vm->mutex);
> -		list_del(&bo_va->vm_list);
> -		mutex_unlock(&bo_va->vm->mutex);
> -		list_del(&bo_va->bo_list);
> -		kfree(bo_va);
> +		radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>   	}
>   }
>
Alex Deucher Aug. 4, 2012, 1:29 p.m. UTC | #2
On Fri, Aug 3, 2012 at 5:26 PM,  <j.glisse@gmail.com> wrote:
> From: Jerome Glisse <jglisse@redhat.com>
>
> Virtual address need to be fenced to know when we can safely remove it.
> This patch also properly clear the pagetable. Previously it was
> serouisly broken.
>
> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking.
>
> v2: For to update pagetable when unbinding bo (don't bailout if
>     bo_va->valid is true).
> v3: Add kernel 3.5/3.4 comment.
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/radeon/radeon.h        |    1 +
>  drivers/gpu/drm/radeon/radeon_cs.c     |   32 +++++++++++++++++++++++++++++---
>  drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
>  drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
>  drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
>  5 files changed, 55 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5431af2..8d75c65 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>         uint64_t                        soffset;
>         uint64_t                        eoffset;
>         uint32_t                        flags;
> +       struct radeon_fence             *fence;
>         bool                            valid;
>  };
>
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 8a4c49e..995f3ab 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>         return 0;
>  }
>
> +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
> +                                 struct radeon_fence *fence)
> +{
> +       struct radeon_fpriv *fpriv = parser->filp->driver_priv;
> +       struct radeon_vm *vm = &fpriv->vm;
> +       struct radeon_bo_list *lobj;
> +       int r;
> +
> +       if (parser->chunk_ib_idx == -1)
> +               return;
> +       if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
> +               return;
> +
> +       list_for_each_entry(lobj, &parser->validated, tv.head) {
> +               struct radeon_bo_va *bo_va;
> +               struct radeon_bo *rbo = lobj->bo;
> +
> +               bo_va = radeon_bo_va(rbo, vm);
> +               radeon_fence_unref(&bo_va->fence);
> +               bo_va->fence = radeon_fence_ref(fence);
> +       }
> +       return 0;
> +}
> +
>  /**
>   * cs_parser_fini() - clean parser states
>   * @parser:    parser structure holding parsing context.
> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
>  {
>         unsigned i;
>
> -       if (!error)
> +       if (!error) {
> +               /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */
> +               radeon_bo_vm_fence_va(parser, parser->ib.fence);
>                 ttm_eu_fence_buffer_objects(&parser->validated,
>                                             parser->ib.fence);
> -       else
> +       } else {
>                 ttm_eu_backoff_reservation(&parser->validated);
> +       }
>
>         if (parser->relocs != NULL) {
>                 for (i = 0; i < parser->nrelocs; i++) {
> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
>
>         if (parser->chunk_ib_idx == -1)
>                 return 0;
> -
>         if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>                 return 0;
>
> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
> index b372005..9912182 100644
> --- a/drivers/gpu/drm/radeon/radeon_gart.c
> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
>                 return -EINVAL;
>         }
>
> -       if (bo_va->valid)
> +       if (bo_va->valid && mem)
>                 return 0;
>
>         ngpu_pages = radeon_bo_ngpu_pages(bo);
> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>                      struct radeon_bo *bo)
>  {
>         struct radeon_bo_va *bo_va;
> +       int r;
>
>         bo_va = radeon_bo_va(bo, vm);
>         if (bo_va == NULL)
>                 return 0;
>
> +       /* wait for va use to end */
> +       while (bo_va->fence) {
> +               r = radeon_fence_wait(bo_va->fence, false);
> +               if (r) {
> +                       DRM_ERROR("error while waiting for fence: %d\n", r);
> +               }
> +               if (r == -EDEADLK) {
> +                       r = radeon_gpu_reset(rdev);
> +                       if (!r)
> +                               continue;
> +               }
> +               break;
> +       }
> +       radeon_fence_unref(&bo_va->fence);
> +
>         mutex_lock(&rdev->vm_manager.lock);
>         mutex_lock(&vm->mutex);
>         radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
>         radeon_vm_unbind_locked(rdev, vm);
>         mutex_unlock(&rdev->vm_manager.lock);
>
> -       /* remove all bo */
> +       /* remove all bo at this point non are busy any more because unbind
> +        * waited for the last vm fence to signal
> +        */
>         r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>         if (!r) {
>                 bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>                 list_del_init(&bo_va->bo_list);
>                 list_del_init(&bo_va->vm_list);
> +               radeon_fence_unref(&bo_va->fence);
>                 radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>                 kfree(bo_va);
>         }
> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
>                 r = radeon_bo_reserve(bo_va->bo, false);
>                 if (!r) {
>                         list_del_init(&bo_va->bo_list);
> +                       radeon_fence_unref(&bo_va->fence);
>                         radeon_bo_unreserve(bo_va->bo);
>                         kfree(bo_va);
>                 }
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index 84d0452..1b57b00 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object *obj,
>         struct radeon_device *rdev = rbo->rdev;
>         struct radeon_fpriv *fpriv = file_priv->driver_priv;
>         struct radeon_vm *vm = &fpriv->vm;
> -       struct radeon_bo_va *bo_va, *tmp;
>
>         if (rdev->family < CHIP_CAYMAN) {
>                 return;
>         }
>
>         if (radeon_bo_reserve(rbo, false)) {
> +               dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n");
>                 return;
>         }
> -       list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
> -               if (bo_va->vm == vm) {
> -                       /* remove from this vm address space */
> -                       mutex_lock(&vm->mutex);
> -                       list_del(&bo_va->vm_list);
> -                       mutex_unlock(&vm->mutex);
> -                       list_del(&bo_va->bo_list);
> -                       kfree(bo_va);
> -               }
> -       }
> +       radeon_vm_bo_rmv(rdev, vm, rbo);
>         radeon_bo_unreserve(rbo);
>  }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index 1f1a4c8..1cb014b 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>
>         list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>                 /* remove from all vm address space */
> -               mutex_lock(&bo_va->vm->mutex);
> -               list_del(&bo_va->vm_list);
> -               mutex_unlock(&bo_va->vm->mutex);
> -               list_del(&bo_va->bo_list);
> -               kfree(bo_va);
> +               radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>         }
>  }
>
> --
> 1.7.10.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Aug. 6, 2012, 4:06 p.m. UTC | #3
On 04.08.2012 11:07, Christian König wrote:
> On 03.08.2012 23:26, j.glisse@gmail.com wrote:
>> From: Jerome Glisse <jglisse@redhat.com>
>>
>> Virtual address need to be fenced to know when we can safely remove it.
>> This patch also properly clear the pagetable. Previously it was
>> serouisly broken.
>>
>> Kernel 3.5/3.4 need a similar patch but adapted for difference in 
>> mutex locking.
>>
>> v2: For to update pagetable when unbinding bo (don't bailout if
>>      bo_va->valid is true).
>> v3: Add kernel 3.5/3.4 comment.
>>
>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> It should fix the problem, going to test it as soon as I find some 
> 5min spare in my vacation. Till then it is:
>
> Reviewed-by: Christian König <christian.koenig@amd.com>
>
> In the future I would really like to make the updating of the PTE also 
> async, that should save us allot of problems here.

Had time today to test that a bit more. Found two minor notes in the 
patch, see below.

>
>
>> ---
>>   drivers/gpu/drm/radeon/radeon.h        |    1 +
>>   drivers/gpu/drm/radeon/radeon_cs.c     |   32 
>> +++++++++++++++++++++++++++++---
>>   drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
>>   drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
>>   drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
>>   5 files changed, 55 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h 
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 5431af2..8d75c65 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>>       uint64_t            soffset;
>>       uint64_t            eoffset;
>>       uint32_t            flags;
>> +    struct radeon_fence        *fence;
>>       bool                valid;
>>   };
>>   diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
>> b/drivers/gpu/drm/radeon/radeon_cs.c
>> index 8a4c49e..995f3ab 100644
>> --- a/drivers/gpu/drm/radeon/radeon_cs.c
>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct 
>> radeon_cs_parser *p, void *data)
>>       return 0;
>>   }
>>   +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
>> +                  struct radeon_fence *fence)
>> +{
>> +    struct radeon_fpriv *fpriv = parser->filp->driver_priv;
>> +    struct radeon_vm *vm = &fpriv->vm;
>> +    struct radeon_bo_list *lobj;

>> +    int r;
"r" is unused here, please remove.

>> +
>> +    if (parser->chunk_ib_idx == -1)
>> +        return;
>> +    if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>> +        return;
>> +
>> +    list_for_each_entry(lobj, &parser->validated, tv.head) {
>> +        struct radeon_bo_va *bo_va;
>> +        struct radeon_bo *rbo = lobj->bo;
>> +
>> +        bo_va = radeon_bo_va(rbo, vm);
>> +        radeon_fence_unref(&bo_va->fence);
>> +        bo_va->fence = radeon_fence_ref(fence);
>> +    }

>> +    return 0;
The function doesn't return an error value, so this should just be a 
"return" without value.

>> +}
>> +
>>   /**
>>    * cs_parser_fini() - clean parser states
>>    * @parser:    parser structure holding parsing context.
>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct 
>> radeon_cs_parser *parser, int error)
>>   {
>>       unsigned i;
>>   -    if (!error)
>> +    if (!error) {
>> +        /* fence all bo va before ttm_eu_fence_buffer_objects so bo 
>> are still reserved */
>> +        radeon_bo_vm_fence_va(parser, parser->ib.fence);
>>           ttm_eu_fence_buffer_objects(&parser->validated,
>>                           parser->ib.fence);
>> -    else
>> +    } else {
>>           ttm_eu_backoff_reservation(&parser->validated);
>> +    }
>>         if (parser->relocs != NULL) {
>>           for (i = 0; i < parser->nrelocs; i++) {
>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct 
>> radeon_device *rdev,
>>         if (parser->chunk_ib_idx == -1)
>>           return 0;
>> -
>>       if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>           return 0;
>>   diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
>> b/drivers/gpu/drm/radeon/radeon_gart.c
>> index b372005..9912182 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gart.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device 
>> *rdev,
>>           return -EINVAL;
>>       }
>>   -    if (bo_va->valid)
>> +    if (bo_va->valid && mem)
>>           return 0;
>>         ngpu_pages = radeon_bo_ngpu_pages(bo);
>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>                struct radeon_bo *bo)
>>   {
>>       struct radeon_bo_va *bo_va;
>> +    int r;
>>         bo_va = radeon_bo_va(bo, vm);
>>       if (bo_va == NULL)
>>           return 0;
>>   +    /* wait for va use to end */
>> +    while (bo_va->fence) {
>> +        r = radeon_fence_wait(bo_va->fence, false);
>> +        if (r) {
>> +            DRM_ERROR("error while waiting for fence: %d\n", r);
>> +        }
>> +        if (r == -EDEADLK) {
>> +            r = radeon_gpu_reset(rdev);
>> +            if (!r)
>> +                continue;
>> +        }
>> +        break;
>> +    }
>> +    radeon_fence_unref(&bo_va->fence);
>> +
>>       mutex_lock(&rdev->vm_manager.lock);
>>       mutex_lock(&vm->mutex);
>>       radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, 
>> struct radeon_vm *vm)
>>       radeon_vm_unbind_locked(rdev, vm);
>>       mutex_unlock(&rdev->vm_manager.lock);
>>   -    /* remove all bo */
>> +    /* remove all bo at this point non are busy any more because unbind
>> +     * waited for the last vm fence to signal
>> +     */
>>       r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>>       if (!r) {
>>           bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>>           list_del_init(&bo_va->bo_list);
>>           list_del_init(&bo_va->vm_list);
>> +        radeon_fence_unref(&bo_va->fence);
>>           radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>>           kfree(bo_va);
>>       }
>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, 
>> struct radeon_vm *vm)
>>           r = radeon_bo_reserve(bo_va->bo, false);
>>           if (!r) {
>>               list_del_init(&bo_va->bo_list);
>> +            radeon_fence_unref(&bo_va->fence);
>>               radeon_bo_unreserve(bo_va->bo);
>>               kfree(bo_va);
>>           }
>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
>> b/drivers/gpu/drm/radeon/radeon_gem.c
>> index 84d0452..1b57b00 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct 
>> drm_gem_object *obj,
>>       struct radeon_device *rdev = rbo->rdev;
>>       struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>       struct radeon_vm *vm = &fpriv->vm;
>> -    struct radeon_bo_va *bo_va, *tmp;
>>         if (rdev->family < CHIP_CAYMAN) {
>>           return;
>>       }
>>         if (radeon_bo_reserve(rbo, false)) {
>> +        dev_err(rdev->dev, "leaking bo va because we fail to reserve 
>> bo\n");
>>           return;
>>       }
>> -    list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
>> -        if (bo_va->vm == vm) {
>> -            /* remove from this vm address space */
>> -            mutex_lock(&vm->mutex);
>> -            list_del(&bo_va->vm_list);
>> -            mutex_unlock(&vm->mutex);
>> -            list_del(&bo_va->bo_list);
>> -            kfree(bo_va);
>> -        }
>> -    }
>> +    radeon_vm_bo_rmv(rdev, vm, rbo);
>>       radeon_bo_unreserve(rbo);
>>   }
>>   diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
>> b/drivers/gpu/drm/radeon/radeon_object.c
>> index 1f1a4c8..1cb014b 100644
>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>>         list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>>           /* remove from all vm address space */
>> -        mutex_lock(&bo_va->vm->mutex);
>> -        list_del(&bo_va->vm_list);
>> -        mutex_unlock(&bo_va->vm->mutex);
>> -        list_del(&bo_va->bo_list);
>> -        kfree(bo_va);
>> +        radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>>       }
>>   }
>

Additional to that patch we still need a minor fix to mesa (just move 
freeing the VM range after closing the handle). Going to send that in 
the next minute to the mesa-dev list.

Otherwise the patch indeed fixes the problem, but also isn't the best 
idea for performance.

Cheers,
Christian.
Jerome Glisse Aug. 6, 2012, 4:30 p.m. UTC | #4
On Mon, Aug 6, 2012 at 12:06 PM, Christian König
<deathsimple@vodafone.de> wrote:
> On 04.08.2012 11:07, Christian König wrote:
>>
>> On 03.08.2012 23:26, j.glisse@gmail.com wrote:
>>>
>>> From: Jerome Glisse <jglisse@redhat.com>
>>>
>>> Virtual address need to be fenced to know when we can safely remove it.
>>> This patch also properly clear the pagetable. Previously it was
>>> serouisly broken.
>>>
>>> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex
>>> locking.
>>>
>>> v2: For to update pagetable when unbinding bo (don't bailout if
>>>      bo_va->valid is true).
>>> v3: Add kernel 3.5/3.4 comment.
>>>
>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>
>> It should fix the problem, going to test it as soon as I find some 5min
>> spare in my vacation. Till then it is:
>>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>
>> In the future I would really like to make the updating of the PTE also
>> async, that should save us allot of problems here.
>
>
> Had time today to test that a bit more. Found two minor notes in the patch,
> see below.
>
>
>>
>>
>>> ---
>>>   drivers/gpu/drm/radeon/radeon.h        |    1 +
>>>   drivers/gpu/drm/radeon/radeon_cs.c     |   32
>>> +++++++++++++++++++++++++++++---
>>>   drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
>>>   drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
>>>   drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
>>>   5 files changed, 55 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>> b/drivers/gpu/drm/radeon/radeon.h
>>> index 5431af2..8d75c65 100644
>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>>>       uint64_t            soffset;
>>>       uint64_t            eoffset;
>>>       uint32_t            flags;
>>> +    struct radeon_fence        *fence;
>>>       bool                valid;
>>>   };
>>>   diff --git a/drivers/gpu/drm/radeon/radeon_cs.c
>>> b/drivers/gpu/drm/radeon/radeon_cs.c
>>> index 8a4c49e..995f3ab 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_cs.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
>>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser
>>> *p, void *data)
>>>       return 0;
>>>   }
>>>   +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
>>> +                  struct radeon_fence *fence)
>>> +{
>>> +    struct radeon_fpriv *fpriv = parser->filp->driver_priv;
>>> +    struct radeon_vm *vm = &fpriv->vm;
>>> +    struct radeon_bo_list *lobj;
>
>
>>> +    int r;
>
> "r" is unused here, please remove.
>
>
>>> +
>>> +    if (parser->chunk_ib_idx == -1)
>>> +        return;
>>> +    if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>> +        return;
>>> +
>>> +    list_for_each_entry(lobj, &parser->validated, tv.head) {
>>> +        struct radeon_bo_va *bo_va;
>>> +        struct radeon_bo *rbo = lobj->bo;
>>> +
>>> +        bo_va = radeon_bo_va(rbo, vm);
>>> +        radeon_fence_unref(&bo_va->fence);
>>> +        bo_va->fence = radeon_fence_ref(fence);
>>> +    }
>
>
>>> +    return 0;
>
> The function doesn't return an error value, so this should just be a
> "return" without value.
>
>
>>> +}
>>> +
>>>   /**
>>>    * cs_parser_fini() - clean parser states
>>>    * @parser:    parser structure holding parsing context.
>>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct
>>> radeon_cs_parser *parser, int error)
>>>   {
>>>       unsigned i;
>>>   -    if (!error)
>>> +    if (!error) {
>>> +        /* fence all bo va before ttm_eu_fence_buffer_objects so bo are
>>> still reserved */
>>> +        radeon_bo_vm_fence_va(parser, parser->ib.fence);
>>>           ttm_eu_fence_buffer_objects(&parser->validated,
>>>                           parser->ib.fence);
>>> -    else
>>> +    } else {
>>>           ttm_eu_backoff_reservation(&parser->validated);
>>> +    }
>>>         if (parser->relocs != NULL) {
>>>           for (i = 0; i < parser->nrelocs; i++) {
>>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device
>>> *rdev,
>>>         if (parser->chunk_ib_idx == -1)
>>>           return 0;
>>> -
>>>       if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>>           return 0;
>>>   diff --git a/drivers/gpu/drm/radeon/radeon_gart.c
>>> b/drivers/gpu/drm/radeon/radeon_gart.c
>>> index b372005..9912182 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gart.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
>>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device
>>> *rdev,
>>>           return -EINVAL;
>>>       }
>>>   -    if (bo_va->valid)
>>> +    if (bo_va->valid && mem)
>>>           return 0;
>>>         ngpu_pages = radeon_bo_ngpu_pages(bo);
>>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>>                struct radeon_bo *bo)
>>>   {
>>>       struct radeon_bo_va *bo_va;
>>> +    int r;
>>>         bo_va = radeon_bo_va(bo, vm);
>>>       if (bo_va == NULL)
>>>           return 0;
>>>   +    /* wait for va use to end */
>>> +    while (bo_va->fence) {
>>> +        r = radeon_fence_wait(bo_va->fence, false);
>>> +        if (r) {
>>> +            DRM_ERROR("error while waiting for fence: %d\n", r);
>>> +        }
>>> +        if (r == -EDEADLK) {
>>> +            r = radeon_gpu_reset(rdev);
>>> +            if (!r)
>>> +                continue;
>>> +        }
>>> +        break;
>>> +    }
>>> +    radeon_fence_unref(&bo_va->fence);
>>> +
>>>       mutex_lock(&rdev->vm_manager.lock);
>>>       mutex_lock(&vm->mutex);
>>>       radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
>>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev,
>>> struct radeon_vm *vm)
>>>       radeon_vm_unbind_locked(rdev, vm);
>>>       mutex_unlock(&rdev->vm_manager.lock);
>>>   -    /* remove all bo */
>>> +    /* remove all bo at this point non are busy any more because unbind
>>> +     * waited for the last vm fence to signal
>>> +     */
>>>       r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>>>       if (!r) {
>>>           bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>>>           list_del_init(&bo_va->bo_list);
>>>           list_del_init(&bo_va->vm_list);
>>> +        radeon_fence_unref(&bo_va->fence);
>>>           radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>>>           kfree(bo_va);
>>>       }
>>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev,
>>> struct radeon_vm *vm)
>>>           r = radeon_bo_reserve(bo_va->bo, false);
>>>           if (!r) {
>>>               list_del_init(&bo_va->bo_list);
>>> +            radeon_fence_unref(&bo_va->fence);
>>>               radeon_bo_unreserve(bo_va->bo);
>>>               kfree(bo_va);
>>>           }
>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>> index 84d0452..1b57b00 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object
>>> *obj,
>>>       struct radeon_device *rdev = rbo->rdev;
>>>       struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>>       struct radeon_vm *vm = &fpriv->vm;
>>> -    struct radeon_bo_va *bo_va, *tmp;
>>>         if (rdev->family < CHIP_CAYMAN) {
>>>           return;
>>>       }
>>>         if (radeon_bo_reserve(rbo, false)) {
>>> +        dev_err(rdev->dev, "leaking bo va because we fail to reserve
>>> bo\n");
>>>           return;
>>>       }
>>> -    list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
>>> -        if (bo_va->vm == vm) {
>>> -            /* remove from this vm address space */
>>> -            mutex_lock(&vm->mutex);
>>> -            list_del(&bo_va->vm_list);
>>> -            mutex_unlock(&vm->mutex);
>>> -            list_del(&bo_va->bo_list);
>>> -            kfree(bo_va);
>>> -        }
>>> -    }
>>> +    radeon_vm_bo_rmv(rdev, vm, rbo);
>>>       radeon_bo_unreserve(rbo);
>>>   }
>>>   diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>>> b/drivers/gpu/drm/radeon/radeon_object.c
>>> index 1f1a4c8..1cb014b 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>>>         list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>>>           /* remove from all vm address space */
>>> -        mutex_lock(&bo_va->vm->mutex);
>>> -        list_del(&bo_va->vm_list);
>>> -        mutex_unlock(&bo_va->vm->mutex);
>>> -        list_del(&bo_va->bo_list);
>>> -        kfree(bo_va);
>>> +        radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>>>       }
>>>   }
>>
>>
>
> Additional to that patch we still need a minor fix to mesa (just move
> freeing the VM range after closing the handle). Going to send that in the
> next minute to the mesa-dev list.
>
> Otherwise the patch indeed fixes the problem, but also isn't the best idea
> for performance.
>
> Cheers,
> Christian.
>

This does not impact performance, it's all about destroying bo/va so
it's not a performance path. Or am i missing something here ?

Cheers,
Jerome
Christian König Aug. 6, 2012, 4:55 p.m. UTC | #5
On 06.08.2012 18:30, Jerome Glisse wrote:
> On Mon, Aug 6, 2012 at 12:06 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> [SNIP]
>> Additional to that patch we still need a minor fix to mesa (just move
>> freeing the VM range after closing the handle). Going to send that in the
>> next minute to the mesa-dev list.
>>
>> Otherwise the patch indeed fixes the problem, but also isn't the best idea
>> for performance.
>>
>> Cheers,
>> Christian.
>>
> This does not impact performance, it's all about destroying bo/va so
> it's not a performance path. Or am i missing something here ?

radeonsi currently allocates very small BOs (8-32 bytes) for T# 
descriptors, and throws them away immediately after drawing.

That alone of course is insane under a performance aspect, but waiting 
for the last job to finish makes things completely worse.

It just moves my priorities of hacking on radeonsi a bit around.

Christian.
Michel Dänzer Aug. 8, 2012, 1:54 p.m. UTC | #6
On Mon, 2012-08-06 at 18:55 +0200, Christian König wrote: 
> On 06.08.2012 18:30, Jerome Glisse wrote:
> > On Mon, Aug 6, 2012 at 12:06 PM, Christian König
> > <deathsimple@vodafone.de> wrote:
> >> [SNIP]
> >> Additional to that patch we still need a minor fix to mesa (just move
> >> freeing the VM range after closing the handle). Going to send that in the
> >> next minute to the mesa-dev list.
> >>
> >> Otherwise the patch indeed fixes the problem, but also isn't the best idea
> >> for performance.
> >>
> > This does not impact performance, it's all about destroying bo/va so
> > it's not a performance path. Or am i missing something here ?
> 
> radeonsi currently allocates very small BOs (8-32 bytes) for T# 
> descriptors, and throws them away immediately after drawing.
> 
> That alone of course is insane under a performance aspect, but waiting 
> for the last job to finish makes things completely worse.

Absolutely, but I think blocking on BO destruction really is the
fundamental problem. To fix this, an alternative to your patch might be
the fenced buffer manager in src/gallium/auxiliary/pipebuffer/.
Jerome Glisse Aug. 8, 2012, 2:06 p.m. UTC | #7
On Mon, Aug 6, 2012 at 12:55 PM, Christian König
<deathsimple@vodafone.de> wrote:
> On 06.08.2012 18:30, Jerome Glisse wrote:
>>
>> On Mon, Aug 6, 2012 at 12:06 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> [SNIP]
>>>
>>> Additional to that patch we still need a minor fix to mesa (just move
>>> freeing the VM range after closing the handle). Going to send that in the
>>> next minute to the mesa-dev list.
>>>
>>> Otherwise the patch indeed fixes the problem, but also isn't the best
>>> idea
>>> for performance.
>>>
>>> Cheers,
>>> Christian.
>>>
>> This does not impact performance, it's all about destroying bo/va so
>> it's not a performance path. Or am i missing something here ?
>
>
> radeonsi currently allocates very small BOs (8-32 bytes) for T# descriptors,
> and throws them away immediately after drawing.
>
> That alone of course is insane under a performance aspect, but waiting for
> the last job to finish makes things completely worse.
>
> It just moves my priorities of hacking on radeonsi a bit around.
>
> Christian.

Well we could simply just not register any close callback for gem and
let the ttm bo delayed queue handle thing, that way userspace won't
stall.

Cheers,
Jerome
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5431af2..8d75c65 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -300,6 +300,7 @@  struct radeon_bo_va {
 	uint64_t			soffset;
 	uint64_t			eoffset;
 	uint32_t			flags;
+	struct radeon_fence		*fence;
 	bool				valid;
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 8a4c49e..995f3ab 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -278,6 +278,30 @@  int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	return 0;
 }
 
+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+				  struct radeon_fence *fence)
+{
+	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+	struct radeon_vm *vm = &fpriv->vm;
+	struct radeon_bo_list *lobj;
+	int r;
+
+	if (parser->chunk_ib_idx == -1)
+		return;
+	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
+		return;
+
+	list_for_each_entry(lobj, &parser->validated, tv.head) {
+		struct radeon_bo_va *bo_va;
+		struct radeon_bo *rbo = lobj->bo;
+
+		bo_va = radeon_bo_va(rbo, vm);
+		radeon_fence_unref(&bo_va->fence);
+		bo_va->fence = radeon_fence_ref(fence);
+	}
+	return 0;
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
@@ -290,11 +314,14 @@  static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 {
 	unsigned i;
 
-	if (!error)
+	if (!error) {
+		/* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */
+		radeon_bo_vm_fence_va(parser, parser->ib.fence);
 		ttm_eu_fence_buffer_objects(&parser->validated,
 					    parser->ib.fence);
-	else
+	} else {
 		ttm_eu_backoff_reservation(&parser->validated);
+	}
 
 	if (parser->relocs != NULL) {
 		for (i = 0; i < parser->nrelocs; i++) {
@@ -388,7 +415,6 @@  static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 
 	if (parser->chunk_ib_idx == -1)
 		return 0;
-
 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 		return 0;
 
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index b372005..9912182 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -814,7 +814,7 @@  int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 		return -EINVAL;
 	}
 
-	if (bo_va->valid)
+	if (bo_va->valid && mem)
 		return 0;
 
 	ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -859,11 +859,27 @@  int radeon_vm_bo_rmv(struct radeon_device *rdev,
 		     struct radeon_bo *bo)
 {
 	struct radeon_bo_va *bo_va;
+	int r;
 
 	bo_va = radeon_bo_va(bo, vm);
 	if (bo_va == NULL)
 		return 0;
 
+	/* wait for va use to end */
+	while (bo_va->fence) {
+		r = radeon_fence_wait(bo_va->fence, false);
+		if (r) {
+			DRM_ERROR("error while waiting for fence: %d\n", r);
+		}
+		if (r == -EDEADLK) {
+			r = radeon_gpu_reset(rdev);
+			if (!r)
+				continue;
+		}
+		break;
+	}
+	radeon_fence_unref(&bo_va->fence);
+
 	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
 	radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -952,12 +968,15 @@  void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 	radeon_vm_unbind_locked(rdev, vm);
 	mutex_unlock(&rdev->vm_manager.lock);
 
-	/* remove all bo */
+	/* remove all bo at this point non are busy any more because unbind
+	 * waited for the last vm fence to signal
+	 */
 	r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 	if (!r) {
 		bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
 		list_del_init(&bo_va->bo_list);
 		list_del_init(&bo_va->vm_list);
+		radeon_fence_unref(&bo_va->fence);
 		radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
 		kfree(bo_va);
 	}
@@ -969,6 +988,7 @@  void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 		r = radeon_bo_reserve(bo_va->bo, false);
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
+			radeon_fence_unref(&bo_va->fence);
 			radeon_bo_unreserve(bo_va->bo);
 			kfree(bo_va);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 84d0452..1b57b00 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -134,25 +134,16 @@  void radeon_gem_object_close(struct drm_gem_object *obj,
 	struct radeon_device *rdev = rbo->rdev;
 	struct radeon_fpriv *fpriv = file_priv->driver_priv;
 	struct radeon_vm *vm = &fpriv->vm;
-	struct radeon_bo_va *bo_va, *tmp;
 
 	if (rdev->family < CHIP_CAYMAN) {
 		return;
 	}
 
 	if (radeon_bo_reserve(rbo, false)) {
+		dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n");
 		return;
 	}
-	list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
-		if (bo_va->vm == vm) {
-			/* remove from this vm address space */
-			mutex_lock(&vm->mutex);
-			list_del(&bo_va->vm_list);
-			mutex_unlock(&vm->mutex);
-			list_del(&bo_va->bo_list);
-			kfree(bo_va);
-		}
-	}
+	radeon_vm_bo_rmv(rdev, vm, rbo);
 	radeon_bo_unreserve(rbo);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 1f1a4c8..1cb014b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -52,11 +52,7 @@  void radeon_bo_clear_va(struct radeon_bo *bo)
 
 	list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
 		/* remove from all vm address space */
-		mutex_lock(&bo_va->vm->mutex);
-		list_del(&bo_va->vm_list);
-		mutex_unlock(&bo_va->vm->mutex);
-		list_del(&bo_va->bo_list);
-		kfree(bo_va);
+		radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
 	}
 }