diff mbox series

[RFC,10/10] drm/i915/vm_bind: Fix vm->vm_bind_mutex and vm->mutex nesting

Message ID 20220701225055.8204-11-niranjana.vishwanathapura@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/vm_bind: Add VM_BIND functionality | expand

Commit Message

Niranjana Vishwanathapura July 1, 2022, 10:50 p.m. UTC
VM_BIND functionality maintain that vm->vm_bind_mutex will never be taken
while holding vm->mutex.
However, while closing 'vm', vma is destroyed while holding vm->mutex.
But vma releasing needs to take vm->vm_bind_mutex in order to delete vma
from the vm_bind_list. To avoid this, destroy the vma outside vm->mutex
while closing the 'vm'.

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

Comments

Thomas Hellstrom July 5, 2022, 8:40 a.m. UTC | #1
On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote:
> VM_BIND functionality maintain that vm->vm_bind_mutex will never be
> taken
> while holding vm->mutex.
> However, while closing 'vm', vma is destroyed while holding vm-
> >mutex.
> But vma releasing needs to take vm->vm_bind_mutex in order to delete
> vma
> from the vm_bind_list. To avoid this, destroy the vma outside vm-
> >mutex
> while closing the 'vm'.
> 
> Signed-off-by: Niranjana Vishwanathapura

First, when introducing a new feature like this, we should not need to
end the series with "Fix.." patches like this, rather whatever needs to
be fixed should be fixed where the code was introduced.

Second, an analogy whith linux kernel CPU mapping, could we instead
think of the vm_bind_lock being similar to the mmap_lock, and the
vm_mutex being similar to the i_mmap_lock, the former being used for VA
manipulation and the latter when attaching / removing the backing store
from the VA?

Then we would not need to take the vm_bind_lock from vma destruction
since the VA would already have been reclaimed at that point. For vm
destruction here we'd loop over all relevant vm bind VAs under the
vm_bind lock and call vm_unbind? Would that work?

/Thomas


> <niranjana.vishwanathapura@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gtt.c | 23 ++++++++++++++++++-----
>  1 file changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c
> b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index 4ab3bda644ff..4f707d0eb3ef 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -109,7 +109,8 @@ int map_pt_dma_locked(struct i915_address_space
> *vm, struct drm_i915_gem_object
>         return 0;
>  }
>  
> -static void clear_vm_list(struct list_head *list)
> +static void clear_vm_list(struct list_head *list,
> +                         struct list_head *destroy_list)
>  {
>         struct i915_vma *vma, *vn;
>  
> @@ -138,8 +139,7 @@ static void clear_vm_list(struct list_head *list)
>                         i915_vm_resv_get(vma->vm);
>                         vma->vm_ddestroy = true;
>                 } else {
> -                       i915_vma_destroy_locked(vma);
> -                       i915_gem_object_put(obj);
> +                       list_move_tail(&vma->vm_link, destroy_list);
>                 }
>  
>         }
> @@ -147,16 +147,29 @@ static void clear_vm_list(struct list_head
> *list)
>  
>  static void __i915_vm_close(struct i915_address_space *vm)
>  {
> +       struct i915_vma *vma, *vn;
> +       struct list_head list;
> +
> +       INIT_LIST_HEAD(&list);
> +
>         mutex_lock(&vm->mutex);
>  
> -       clear_vm_list(&vm->bound_list);
> -       clear_vm_list(&vm->unbound_list);
> +       clear_vm_list(&vm->bound_list, &list);
> +       clear_vm_list(&vm->unbound_list, &list);
>  
>         /* Check for must-fix unanticipated side-effects */
>         GEM_BUG_ON(!list_empty(&vm->bound_list));
>         GEM_BUG_ON(!list_empty(&vm->unbound_list));
>  
>         mutex_unlock(&vm->mutex);
> +
> +       /* Destroy vmas outside vm->mutex */
> +       list_for_each_entry_safe(vma, vn, &list, vm_link) {
> +               struct drm_i915_gem_object *obj = vma->obj;
> +
> +               i915_vma_destroy(vma);
> +               i915_gem_object_put(obj);
> +       }
>  }
>  
>  /* lock the vm into the current ww, if we lock one, we lock all */
Ramalingam C July 6, 2022, 4:33 p.m. UTC | #2
On 2022-07-05 at 10:40:56 +0200, Thomas Hellström wrote:
> On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote:
> > VM_BIND functionality maintain that vm->vm_bind_mutex will never be
> > taken
> > while holding vm->mutex.
> > However, while closing 'vm', vma is destroyed while holding vm-
> > >mutex.
> > But vma releasing needs to take vm->vm_bind_mutex in order to delete
> > vma
> > from the vm_bind_list. To avoid this, destroy the vma outside vm-
> > >mutex
> > while closing the 'vm'.
> > 
> > Signed-off-by: Niranjana Vishwanathapura
> 
> First, when introducing a new feature like this, we should not need to
> end the series with "Fix.." patches like this, rather whatever needs to
> be fixed should be fixed where the code was introduced.
Thanks Thomas for the review. I will fix it.
> 
> Second, an analogy whith linux kernel CPU mapping, could we instead
> think of the vm_bind_lock being similar to the mmap_lock, and the
> vm_mutex being similar to the i_mmap_lock, the former being used for VA
> manipulation and the latter when attaching / removing the backing store
> from the VA?
> 
> Then we would not need to take the vm_bind_lock from vma destruction
> since the VA would already have been reclaimed at that point. For vm
> destruction here we'd loop over all relevant vm bind VAs under the
> vm_bind lock and call vm_unbind? Would that work?

Sounds reasonable. I will try this locking approach.

Ram
> 
> /Thomas
> 
> 
> > <niranjana.vishwanathapura@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gtt.c | 23 ++++++++++++++++++-----
> >  1 file changed, 18 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c
> > b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > index 4ab3bda644ff..4f707d0eb3ef 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > @@ -109,7 +109,8 @@ int map_pt_dma_locked(struct i915_address_space
> > *vm, struct drm_i915_gem_object
> >         return 0;
> >  }
> >  
> > -static void clear_vm_list(struct list_head *list)
> > +static void clear_vm_list(struct list_head *list,
> > +                         struct list_head *destroy_list)
> >  {
> >         struct i915_vma *vma, *vn;
> >  
> > @@ -138,8 +139,7 @@ static void clear_vm_list(struct list_head *list)
> >                         i915_vm_resv_get(vma->vm);
> >                         vma->vm_ddestroy = true;
> >                 } else {
> > -                       i915_vma_destroy_locked(vma);
> > -                       i915_gem_object_put(obj);
> > +                       list_move_tail(&vma->vm_link, destroy_list);
> >                 }
> >  
> >         }
> > @@ -147,16 +147,29 @@ static void clear_vm_list(struct list_head
> > *list)
> >  
> >  static void __i915_vm_close(struct i915_address_space *vm)
> >  {
> > +       struct i915_vma *vma, *vn;
> > +       struct list_head list;
> > +
> > +       INIT_LIST_HEAD(&list);
> > +
> >         mutex_lock(&vm->mutex);
> >  
> > -       clear_vm_list(&vm->bound_list);
> > -       clear_vm_list(&vm->unbound_list);
> > +       clear_vm_list(&vm->bound_list, &list);
> > +       clear_vm_list(&vm->unbound_list, &list);
> >  
> >         /* Check for must-fix unanticipated side-effects */
> >         GEM_BUG_ON(!list_empty(&vm->bound_list));
> >         GEM_BUG_ON(!list_empty(&vm->unbound_list));
> >  
> >         mutex_unlock(&vm->mutex);
> > +
> > +       /* Destroy vmas outside vm->mutex */
> > +       list_for_each_entry_safe(vma, vn, &list, vm_link) {
> > +               struct drm_i915_gem_object *obj = vma->obj;
> > +
> > +               i915_vma_destroy(vma);
> > +               i915_gem_object_put(obj);
> > +       }
> >  }
> >  
> >  /* lock the vm into the current ww, if we lock one, we lock all */
>
Niranjana Vishwanathapura July 7, 2022, 5:56 a.m. UTC | #3
On Tue, Jul 05, 2022 at 10:40:56AM +0200, Thomas Hellström wrote:
>On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote:
>> VM_BIND functionality maintain that vm->vm_bind_mutex will never be
>> taken
>> while holding vm->mutex.
>> However, while closing 'vm', vma is destroyed while holding vm-
>> >mutex.
>> But vma releasing needs to take vm->vm_bind_mutex in order to delete
>> vma
>> from the vm_bind_list. To avoid this, destroy the vma outside vm-
>> >mutex
>> while closing the 'vm'.
>>
>> Signed-off-by: Niranjana Vishwanathapura
>
>First, when introducing a new feature like this, we should not need to
>end the series with "Fix.." patches like this, rather whatever needs to
>be fixed should be fixed where the code was introduced.
>

Yah, makes sense.

>Second, an analogy whith linux kernel CPU mapping, could we instead
>think of the vm_bind_lock being similar to the mmap_lock, and the
>vm_mutex being similar to the i_mmap_lock, the former being used for VA
>manipulation and the latter when attaching / removing the backing store
>from the VA?
>
>Then we would not need to take the vm_bind_lock from vma destruction
>since the VA would already have been reclaimed at that point. For vm
>destruction here we'd loop over all relevant vm bind VAs under the
>vm_bind lock and call vm_unbind? Would that work?
>

Yah. Infact, in vm_unbind call, we first do VA reclaim
(i915_gem_vm_bind_remove()) under the vm_bind_lock and destroy the
vma (i915_vma_destroy()) outside the vm_bind_lock (under the object
lock). The vma destruction in vm_bind call error path is bit different,
but I think it can be handled as well.
Yah, as mentioned in other thread, doing a VA reclaim
(i915_gem_vm_bind_remove()) early during VM destruction under the
vm_bind_lock as you suggested would fit in there nicely.

Niranjana

>/Thomas
>
>
>> <niranjana.vishwanathapura@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_gtt.c | 23 ++++++++++++++++++-----
>>  1 file changed, 18 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c
>> b/drivers/gpu/drm/i915/gt/intel_gtt.c
>> index 4ab3bda644ff..4f707d0eb3ef 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
>> @@ -109,7 +109,8 @@ int map_pt_dma_locked(struct i915_address_space
>> *vm, struct drm_i915_gem_object
>>         return 0;
>>  }
>>  
>> -static void clear_vm_list(struct list_head *list)
>> +static void clear_vm_list(struct list_head *list,
>> +                         struct list_head *destroy_list)
>>  {
>>         struct i915_vma *vma, *vn;
>>  
>> @@ -138,8 +139,7 @@ static void clear_vm_list(struct list_head *list)
>>                         i915_vm_resv_get(vma->vm);
>>                         vma->vm_ddestroy = true;
>>                 } else {
>> -                       i915_vma_destroy_locked(vma);
>> -                       i915_gem_object_put(obj);
>> +                       list_move_tail(&vma->vm_link, destroy_list);
>>                 }
>>  
>>         }
>> @@ -147,16 +147,29 @@ static void clear_vm_list(struct list_head
>> *list)
>>  
>>  static void __i915_vm_close(struct i915_address_space *vm)
>>  {
>> +       struct i915_vma *vma, *vn;
>> +       struct list_head list;
>> +
>> +       INIT_LIST_HEAD(&list);
>> +
>>         mutex_lock(&vm->mutex);
>>  
>> -       clear_vm_list(&vm->bound_list);
>> -       clear_vm_list(&vm->unbound_list);
>> +       clear_vm_list(&vm->bound_list, &list);
>> +       clear_vm_list(&vm->unbound_list, &list);
>>  
>>         /* Check for must-fix unanticipated side-effects */
>>         GEM_BUG_ON(!list_empty(&vm->bound_list));
>>         GEM_BUG_ON(!list_empty(&vm->unbound_list));
>>  
>>         mutex_unlock(&vm->mutex);
>> +
>> +       /* Destroy vmas outside vm->mutex */
>> +       list_for_each_entry_safe(vma, vn, &list, vm_link) {
>> +               struct drm_i915_gem_object *obj = vma->obj;
>> +
>> +               i915_vma_destroy(vma);
>> +               i915_gem_object_put(obj);
>> +       }
>>  }
>>  
>>  /* lock the vm into the current ww, if we lock one, we lock all */
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4ab3bda644ff..4f707d0eb3ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -109,7 +109,8 @@  int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
 	return 0;
 }
 
-static void clear_vm_list(struct list_head *list)
+static void clear_vm_list(struct list_head *list,
+			  struct list_head *destroy_list)
 {
 	struct i915_vma *vma, *vn;
 
@@ -138,8 +139,7 @@  static void clear_vm_list(struct list_head *list)
 			i915_vm_resv_get(vma->vm);
 			vma->vm_ddestroy = true;
 		} else {
-			i915_vma_destroy_locked(vma);
-			i915_gem_object_put(obj);
+			list_move_tail(&vma->vm_link, destroy_list);
 		}
 
 	}
@@ -147,16 +147,29 @@  static void clear_vm_list(struct list_head *list)
 
 static void __i915_vm_close(struct i915_address_space *vm)
 {
+	struct i915_vma *vma, *vn;
+	struct list_head list;
+
+	INIT_LIST_HEAD(&list);
+
 	mutex_lock(&vm->mutex);
 
-	clear_vm_list(&vm->bound_list);
-	clear_vm_list(&vm->unbound_list);
+	clear_vm_list(&vm->bound_list, &list);
+	clear_vm_list(&vm->unbound_list, &list);
 
 	/* Check for must-fix unanticipated side-effects */
 	GEM_BUG_ON(!list_empty(&vm->bound_list));
 	GEM_BUG_ON(!list_empty(&vm->unbound_list));
 
 	mutex_unlock(&vm->mutex);
+
+	/* Destroy vmas outside vm->mutex */
+	list_for_each_entry_safe(vma, vn, &list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		i915_vma_destroy(vma);
+		i915_gem_object_put(obj);
+	}
 }
 
 /* lock the vm into the current ww, if we lock one, we lock all */