diff mbox series

[v7,20/20] drm/i915/vm_bind: Async vm_unbind support

Message ID 20221113075732.32100-21-niranjana.vishwanathapura@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/vm_bind: Add VM_BIND functionality | expand

Commit Message

Niranjana Vishwanathapura Nov. 13, 2022, 7:57 a.m. UTC
Asynchronously unbind the vma upon vm_unbind call.
Fall back to synchronous unbind if backend doesn't support
async unbind or if async unbind fails.

No need for vm_unbind out fence support as i915 will internally
handle all sequencing and user need not try to sequence any
operation with the unbind completion.

v2: use i915_vma_destroy_async in vm_unbind ioctl

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h               |  1 +
 include/uapi/drm/i915_drm.h                   |  3 +-
 4 files changed, 51 insertions(+), 6 deletions(-)

Comments

Matthew Auld Nov. 15, 2022, 11:05 a.m. UTC | #1
On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
> Asynchronously unbind the vma upon vm_unbind call.
> Fall back to synchronous unbind if backend doesn't support
> async unbind or if async unbind fails.
> 
> No need for vm_unbind out fence support as i915 will internally
> handle all sequencing and user need not try to sequence any
> operation with the unbind completion.
> 
> v2: use i915_vma_destroy_async in vm_unbind ioctl
> 
> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>

This only does it for non-partial vma, right? Or was that changed somewhere?

Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> ---
>   .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>   drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h               |  1 +
>   include/uapi/drm/i915_drm.h                   |  3 +-
>   4 files changed, 51 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> index d87d1210365b..36651b447966 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> @@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct i915_address_space *vm,
>   	 */
>   	obj = vma->obj;
>   	i915_gem_object_lock(obj, NULL);
> -	i915_vma_destroy(vma);
> +	i915_vma_destroy_async(vma);
>   	i915_gem_object_unlock(obj);
>   
>   	i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7cf77c67d755..483d25f2425c 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -42,6 +42,8 @@
>   #include "i915_vma.h"
>   #include "i915_vma_resource.h"
>   
> +static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
> +
>   static inline void assert_vma_held_evict(const struct i915_vma *vma)
>   {
>   	/*
> @@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>   	spin_unlock_irq(&gt->closed_lock);
>   }
>   
> -static void force_unbind(struct i915_vma *vma)
> +static void force_unbind(struct i915_vma *vma, bool async)
>   {
>   	if (!drm_mm_node_allocated(&vma->node))
>   		return;
> @@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma *vma)
>   		i915_vma_set_purged(vma);
>   
>   	atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
> -	WARN_ON(__i915_vma_unbind(vma));
> +	if (async) {
> +		struct dma_fence *fence;
> +
> +		fence = __i915_vma_unbind_async(vma);
> +		if (IS_ERR_OR_NULL(fence)) {
> +			async = false;
> +		} else {
> +			dma_resv_add_fence(vma->obj->base.resv, fence,
> +					   DMA_RESV_USAGE_READ);
> +			dma_fence_put(fence);
> +		}
> +	}
> +
> +	if (!async)
> +		WARN_ON(__i915_vma_unbind(vma));
>   	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>   }
>   
> @@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
>   {
>   	lockdep_assert_held(&vma->vm->mutex);
>   
> -	force_unbind(vma);
> +	force_unbind(vma, false);
>   	list_del_init(&vma->vm_link);
>   	release_references(vma, vma->vm->gt, false);
>   }
> @@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>   	bool vm_ddestroy;
>   
>   	mutex_lock(&vma->vm->mutex);
> -	force_unbind(vma);
> +	force_unbind(vma, false);
> +	list_del_init(&vma->vm_link);
> +	vm_ddestroy = vma->vm_ddestroy;
> +	vma->vm_ddestroy = false;
> +
> +	/* vma->vm may be freed when releasing vma->vm->mutex. */
> +	gt = vma->vm->gt;
> +	mutex_unlock(&vma->vm->mutex);
> +	release_references(vma, gt, vm_ddestroy);
> +}
> +
> +void i915_vma_destroy_async(struct i915_vma *vma)
> +{
> +	bool vm_ddestroy, async = vma->obj->mm.rsgt;
> +	struct intel_gt *gt;
> +
> +	if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
> +		async = false;
> +
> +	mutex_lock(&vma->vm->mutex);
> +	/*
> +	 * Ensure any asynchronous binding is complete while using
> +	 * async unbind as we will be releasing the vma here.
> +	 */
> +	if (async && i915_active_wait(&vma->active))
> +		async = false;
> +
> +	force_unbind(vma, async);
>   	list_del_init(&vma->vm_link);
>   	vm_ddestroy = vma->vm_ddestroy;
>   	vma->vm_ddestroy = false;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 737ef310d046..25f15965dab8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>   
>   void i915_vma_destroy_locked(struct i915_vma *vma);
>   void i915_vma_destroy(struct i915_vma *vma);
> +void i915_vma_destroy_async(struct i915_vma *vma);
>   
>   #define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
>   
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index e5600f358a15..431d40bb1dee 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>    * any error.
>    *
>    * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
> - * are not ordered.
> + * are not ordered. Furthermore, parts of the VM_UNBIND operation can be done
> + * asynchronously.
>    */
>   struct drm_i915_gem_vm_unbind {
>   	/** @vm_id: VM (address space) id to bind */
Andi Shyti Nov. 15, 2022, 3:58 p.m. UTC | #2
Hi Niranjana,

On Sat, Nov 12, 2022 at 11:57:32PM -0800, Niranjana Vishwanathapura wrote:
> Asynchronously unbind the vma upon vm_unbind call.
> Fall back to synchronous unbind if backend doesn't support
> async unbind or if async unbind fails.
> 
> No need for vm_unbind out fence support as i915 will internally
> handle all sequencing and user need not try to sequence any
> operation with the unbind completion.
> 
> v2: use i915_vma_destroy_async in vm_unbind ioctl
> 
> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
> ---
>  .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>  drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>  drivers/gpu/drm/i915/i915_vma.h               |  1 +
>  include/uapi/drm/i915_drm.h                   |  3 +-
>  4 files changed, 51 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> index d87d1210365b..36651b447966 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
> @@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct i915_address_space *vm,
>  	 */
>  	obj = vma->obj;
>  	i915_gem_object_lock(obj, NULL);
> -	i915_vma_destroy(vma);
> +	i915_vma_destroy_async(vma);
>  	i915_gem_object_unlock(obj);
>  
>  	i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7cf77c67d755..483d25f2425c 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -42,6 +42,8 @@
>  #include "i915_vma.h"
>  #include "i915_vma_resource.h"
>  
> +static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
> +
>  static inline void assert_vma_held_evict(const struct i915_vma *vma)
>  {
>  	/*
> @@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>  	spin_unlock_irq(&gt->closed_lock);
>  }
>  
> -static void force_unbind(struct i915_vma *vma)
> +static void force_unbind(struct i915_vma *vma, bool async)

I still like the defines on this, they look cleaner, but it's a
matter of taste.

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>

Andi
Niranjana Vishwanathapura Nov. 15, 2022, 4:15 p.m. UTC | #3
On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>Asynchronously unbind the vma upon vm_unbind call.
>>Fall back to synchronous unbind if backend doesn't support
>>async unbind or if async unbind fails.
>>
>>No need for vm_unbind out fence support as i915 will internally
>>handle all sequencing and user need not try to sequence any
>>operation with the unbind completion.
>>
>>v2: use i915_vma_destroy_async in vm_unbind ioctl
>>
>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
>
>This only does it for non-partial vma, right? Or was that changed somewhere?
>

No, it applies to any vma (partial or non-partial).
It was so from the beginning.

Niranjana

>Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>
>>---
>>  .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>  drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>>  drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>  include/uapi/drm/i915_drm.h                   |  3 +-
>>  4 files changed, 51 insertions(+), 6 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>index d87d1210365b..36651b447966 100644
>>--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>@@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct i915_address_space *vm,
>>  	 */
>>  	obj = vma->obj;
>>  	i915_gem_object_lock(obj, NULL);
>>-	i915_vma_destroy(vma);
>>+	i915_vma_destroy_async(vma);
>>  	i915_gem_object_unlock(obj);
>>  	i915_gem_object_put(obj);
>>diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>index 7cf77c67d755..483d25f2425c 100644
>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>@@ -42,6 +42,8 @@
>>  #include "i915_vma.h"
>>  #include "i915_vma_resource.h"
>>+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
>>+
>>  static inline void assert_vma_held_evict(const struct i915_vma *vma)
>>  {
>>  	/*
>>@@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>  	spin_unlock_irq(&gt->closed_lock);
>>  }
>>-static void force_unbind(struct i915_vma *vma)
>>+static void force_unbind(struct i915_vma *vma, bool async)
>>  {
>>  	if (!drm_mm_node_allocated(&vma->node))
>>  		return;
>>@@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma *vma)
>>  		i915_vma_set_purged(vma);
>>  	atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>-	WARN_ON(__i915_vma_unbind(vma));
>>+	if (async) {
>>+		struct dma_fence *fence;
>>+
>>+		fence = __i915_vma_unbind_async(vma);
>>+		if (IS_ERR_OR_NULL(fence)) {
>>+			async = false;
>>+		} else {
>>+			dma_resv_add_fence(vma->obj->base.resv, fence,
>>+					   DMA_RESV_USAGE_READ);
>>+			dma_fence_put(fence);
>>+		}
>>+	}
>>+
>>+	if (!async)
>>+		WARN_ON(__i915_vma_unbind(vma));
>>  	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>  }
>>@@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
>>  {
>>  	lockdep_assert_held(&vma->vm->mutex);
>>-	force_unbind(vma);
>>+	force_unbind(vma, false);
>>  	list_del_init(&vma->vm_link);
>>  	release_references(vma, vma->vm->gt, false);
>>  }
>>@@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>  	bool vm_ddestroy;
>>  	mutex_lock(&vma->vm->mutex);
>>-	force_unbind(vma);
>>+	force_unbind(vma, false);
>>+	list_del_init(&vma->vm_link);
>>+	vm_ddestroy = vma->vm_ddestroy;
>>+	vma->vm_ddestroy = false;
>>+
>>+	/* vma->vm may be freed when releasing vma->vm->mutex. */
>>+	gt = vma->vm->gt;
>>+	mutex_unlock(&vma->vm->mutex);
>>+	release_references(vma, gt, vm_ddestroy);
>>+}
>>+
>>+void i915_vma_destroy_async(struct i915_vma *vma)
>>+{
>>+	bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>+	struct intel_gt *gt;
>>+
>>+	if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>+		async = false;
>>+
>>+	mutex_lock(&vma->vm->mutex);
>>+	/*
>>+	 * Ensure any asynchronous binding is complete while using
>>+	 * async unbind as we will be releasing the vma here.
>>+	 */
>>+	if (async && i915_active_wait(&vma->active))
>>+		async = false;
>>+
>>+	force_unbind(vma, async);
>>  	list_del_init(&vma->vm_link);
>>  	vm_ddestroy = vma->vm_ddestroy;
>>  	vma->vm_ddestroy = false;
>>diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
>>index 737ef310d046..25f15965dab8 100644
>>--- a/drivers/gpu/drm/i915/i915_vma.h
>>+++ b/drivers/gpu/drm/i915/i915_vma.h
>>@@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>  void i915_vma_destroy_locked(struct i915_vma *vma);
>>  void i915_vma_destroy(struct i915_vma *vma);
>>+void i915_vma_destroy_async(struct i915_vma *vma);
>>  #define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
>>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>index e5600f358a15..431d40bb1dee 100644
>>--- a/include/uapi/drm/i915_drm.h
>>+++ b/include/uapi/drm/i915_drm.h
>>@@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>   * any error.
>>   *
>>   * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
>>- * are not ordered.
>>+ * are not ordered. Furthermore, parts of the VM_UNBIND operation can be done
>>+ * asynchronously.
>>   */
>>  struct drm_i915_gem_vm_unbind {
>>  	/** @vm_id: VM (address space) id to bind */
Niranjana Vishwanathapura Nov. 15, 2022, 4:20 p.m. UTC | #4
On Tue, Nov 15, 2022 at 04:58:42PM +0100, Andi Shyti wrote:
>Hi Niranjana,
>
>On Sat, Nov 12, 2022 at 11:57:32PM -0800, Niranjana Vishwanathapura wrote:
>> Asynchronously unbind the vma upon vm_unbind call.
>> Fall back to synchronous unbind if backend doesn't support
>> async unbind or if async unbind fails.
>>
>> No need for vm_unbind out fence support as i915 will internally
>> handle all sequencing and user need not try to sequence any
>> operation with the unbind completion.
>>
>> v2: use i915_vma_destroy_async in vm_unbind ioctl
>>
>> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
>> ---
>>  .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>  drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>>  drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>  include/uapi/drm/i915_drm.h                   |  3 +-
>>  4 files changed, 51 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>> index d87d1210365b..36651b447966 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>> @@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct i915_address_space *vm,
>>  	 */
>>  	obj = vma->obj;
>>  	i915_gem_object_lock(obj, NULL);
>> -	i915_vma_destroy(vma);
>> +	i915_vma_destroy_async(vma);
>>  	i915_gem_object_unlock(obj);
>>
>>  	i915_gem_object_put(obj);
>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>> index 7cf77c67d755..483d25f2425c 100644
>> --- a/drivers/gpu/drm/i915/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> @@ -42,6 +42,8 @@
>>  #include "i915_vma.h"
>>  #include "i915_vma_resource.h"
>>
>> +static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
>> +
>>  static inline void assert_vma_held_evict(const struct i915_vma *vma)
>>  {
>>  	/*
>> @@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>  	spin_unlock_irq(&gt->closed_lock);
>>  }
>>
>> -static void force_unbind(struct i915_vma *vma)
>> +static void force_unbind(struct i915_vma *vma, bool async)
>
>I still like the defines on this, they look cleaner, but it's a
>matter of taste.
>

Ok, will change.

Niranjana

>Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
>
>Andi
Matthew Auld Nov. 15, 2022, 4:20 p.m. UTC | #5
On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
> On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>> On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>> Asynchronously unbind the vma upon vm_unbind call.
>>> Fall back to synchronous unbind if backend doesn't support
>>> async unbind or if async unbind fails.
>>>
>>> No need for vm_unbind out fence support as i915 will internally
>>> handle all sequencing and user need not try to sequence any
>>> operation with the unbind completion.
>>>
>>> v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>
>>> Signed-off-by: Niranjana Vishwanathapura 
>>> <niranjana.vishwanathapura@intel.com>
>>
>> This only does it for non-partial vma, right? Or was that changed 
>> somewhere?
>>
> 
> No, it applies to any vma (partial or non-partial).
> It was so from the beginning.

Doesn't __i915_vma_unbind_async() return an error when mm.pages != 
vma->pages? IIRC this was discussed before. Just trying to think about 
the consequences of this change.

> 
> Niranjana
> 
>> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>
>>> ---
>>>  .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>  drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>>>  drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>  include/uapi/drm/i915_drm.h                   |  3 +-
>>>  4 files changed, 51 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>> b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>> index d87d1210365b..36651b447966 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>> @@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct 
>>> i915_address_space *vm,
>>>       */
>>>      obj = vma->obj;
>>>      i915_gem_object_lock(obj, NULL);
>>> -    i915_vma_destroy(vma);
>>> +    i915_vma_destroy_async(vma);
>>>      i915_gem_object_unlock(obj);
>>>      i915_gem_object_put(obj);
>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>> b/drivers/gpu/drm/i915/i915_vma.c
>>> index 7cf77c67d755..483d25f2425c 100644
>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>> @@ -42,6 +42,8 @@
>>>  #include "i915_vma.h"
>>>  #include "i915_vma_resource.h"
>>> +static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
>>> +
>>>  static inline void assert_vma_held_evict(const struct i915_vma *vma)
>>>  {
>>>      /*
>>> @@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>      spin_unlock_irq(&gt->closed_lock);
>>>  }
>>> -static void force_unbind(struct i915_vma *vma)
>>> +static void force_unbind(struct i915_vma *vma, bool async)
>>>  {
>>>      if (!drm_mm_node_allocated(&vma->node))
>>>          return;
>>> @@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma *vma)
>>>          i915_vma_set_purged(vma);
>>>      atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>> -    WARN_ON(__i915_vma_unbind(vma));
>>> +    if (async) {
>>> +        struct dma_fence *fence;
>>> +
>>> +        fence = __i915_vma_unbind_async(vma);
>>> +        if (IS_ERR_OR_NULL(fence)) {
>>> +            async = false;
>>> +        } else {
>>> +            dma_resv_add_fence(vma->obj->base.resv, fence,
>>> +                       DMA_RESV_USAGE_READ);
>>> +            dma_fence_put(fence);
>>> +        }
>>> +    }
>>> +
>>> +    if (!async)
>>> +        WARN_ON(__i915_vma_unbind(vma));
>>>      GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>  }
>>> @@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
>>>  {
>>>      lockdep_assert_held(&vma->vm->mutex);
>>> -    force_unbind(vma);
>>> +    force_unbind(vma, false);
>>>      list_del_init(&vma->vm_link);
>>>      release_references(vma, vma->vm->gt, false);
>>>  }
>>> @@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>      bool vm_ddestroy;
>>>      mutex_lock(&vma->vm->mutex);
>>> -    force_unbind(vma);
>>> +    force_unbind(vma, false);
>>> +    list_del_init(&vma->vm_link);
>>> +    vm_ddestroy = vma->vm_ddestroy;
>>> +    vma->vm_ddestroy = false;
>>> +
>>> +    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>> +    gt = vma->vm->gt;
>>> +    mutex_unlock(&vma->vm->mutex);
>>> +    release_references(vma, gt, vm_ddestroy);
>>> +}
>>> +
>>> +void i915_vma_destroy_async(struct i915_vma *vma)
>>> +{
>>> +    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>> +    struct intel_gt *gt;
>>> +
>>> +    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>> +        async = false;
>>> +
>>> +    mutex_lock(&vma->vm->mutex);
>>> +    /*
>>> +     * Ensure any asynchronous binding is complete while using
>>> +     * async unbind as we will be releasing the vma here.
>>> +     */
>>> +    if (async && i915_active_wait(&vma->active))
>>> +        async = false;
>>> +
>>> +    force_unbind(vma, async);
>>>      list_del_init(&vma->vm_link);
>>>      vm_ddestroy = vma->vm_ddestroy;
>>>      vma->vm_ddestroy = false;
>>> diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>> b/drivers/gpu/drm/i915/i915_vma.h
>>> index 737ef310d046..25f15965dab8 100644
>>> --- a/drivers/gpu/drm/i915/i915_vma.h
>>> +++ b/drivers/gpu/drm/i915/i915_vma.h
>>> @@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>  void i915_vma_destroy_locked(struct i915_vma *vma);
>>>  void i915_vma_destroy(struct i915_vma *vma);
>>> +void i915_vma_destroy_async(struct i915_vma *vma);
>>>  #define assert_vma_held(vma) 
>>> dma_resv_assert_held((vma)->obj->base.resv)
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index e5600f358a15..431d40bb1dee 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>   * any error.
>>>   *
>>>   * VM_BIND/UNBIND ioctl calls executed on different CPU threads 
>>> concurrently
>>> - * are not ordered.
>>> + * are not ordered. Furthermore, parts of the VM_UNBIND operation 
>>> can be done
>>> + * asynchronously.
>>>   */
>>>  struct drm_i915_gem_vm_unbind {
>>>      /** @vm_id: VM (address space) id to bind */
Niranjana Vishwanathapura Nov. 15, 2022, 4:33 p.m. UTC | #6
On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>Asynchronously unbind the vma upon vm_unbind call.
>>>>Fall back to synchronous unbind if backend doesn't support
>>>>async unbind or if async unbind fails.
>>>>
>>>>No need for vm_unbind out fence support as i915 will internally
>>>>handle all sequencing and user need not try to sequence any
>>>>operation with the unbind completion.
>>>>
>>>>v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>
>>>>Signed-off-by: Niranjana Vishwanathapura 
>>>><niranjana.vishwanathapura@intel.com>
>>>
>>>This only does it for non-partial vma, right? Or was that changed 
>>>somewhere?
>>>
>>
>>No, it applies to any vma (partial or non-partial).
>>It was so from the beginning.
>
>Doesn't __i915_vma_unbind_async() return an error when mm.pages != 
>vma->pages? IIRC this was discussed before. Just trying to think about 
>the consequences of this change.

I am not seeing any such restriction. Let me probe and check if there
is any such restriction anywhere in the call chain.

Niranjana

>
>>
>>Niranjana
>>
>>>Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>
>>>>---
>>>> .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>> drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>>>> drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>> include/uapi/drm/i915_drm.h                   |  3 +-
>>>> 4 files changed, 51 insertions(+), 6 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>index d87d1210365b..36651b447966 100644
>>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>@@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct 
>>>>i915_address_space *vm,
>>>>      */
>>>>     obj = vma->obj;
>>>>     i915_gem_object_lock(obj, NULL);
>>>>-    i915_vma_destroy(vma);
>>>>+    i915_vma_destroy_async(vma);
>>>>     i915_gem_object_unlock(obj);
>>>>     i915_gem_object_put(obj);
>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>b/drivers/gpu/drm/i915/i915_vma.c
>>>>index 7cf77c67d755..483d25f2425c 100644
>>>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>@@ -42,6 +42,8 @@
>>>> #include "i915_vma.h"
>>>> #include "i915_vma_resource.h"
>>>>+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
>>>>+
>>>> static inline void assert_vma_held_evict(const struct i915_vma *vma)
>>>> {
>>>>     /*
>>>>@@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>     spin_unlock_irq(&gt->closed_lock);
>>>> }
>>>>-static void force_unbind(struct i915_vma *vma)
>>>>+static void force_unbind(struct i915_vma *vma, bool async)
>>>> {
>>>>     if (!drm_mm_node_allocated(&vma->node))
>>>>         return;
>>>>@@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma *vma)
>>>>         i915_vma_set_purged(vma);
>>>>     atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>-    WARN_ON(__i915_vma_unbind(vma));
>>>>+    if (async) {
>>>>+        struct dma_fence *fence;
>>>>+
>>>>+        fence = __i915_vma_unbind_async(vma);
>>>>+        if (IS_ERR_OR_NULL(fence)) {
>>>>+            async = false;
>>>>+        } else {
>>>>+            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>+                       DMA_RESV_USAGE_READ);
>>>>+            dma_fence_put(fence);
>>>>+        }
>>>>+    }
>>>>+
>>>>+    if (!async)
>>>>+        WARN_ON(__i915_vma_unbind(vma));
>>>>     GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>> }
>>>>@@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
>>>> {
>>>>     lockdep_assert_held(&vma->vm->mutex);
>>>>-    force_unbind(vma);
>>>>+    force_unbind(vma, false);
>>>>     list_del_init(&vma->vm_link);
>>>>     release_references(vma, vma->vm->gt, false);
>>>> }
>>>>@@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>     bool vm_ddestroy;
>>>>     mutex_lock(&vma->vm->mutex);
>>>>-    force_unbind(vma);
>>>>+    force_unbind(vma, false);
>>>>+    list_del_init(&vma->vm_link);
>>>>+    vm_ddestroy = vma->vm_ddestroy;
>>>>+    vma->vm_ddestroy = false;
>>>>+
>>>>+    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>+    gt = vma->vm->gt;
>>>>+    mutex_unlock(&vma->vm->mutex);
>>>>+    release_references(vma, gt, vm_ddestroy);
>>>>+}
>>>>+
>>>>+void i915_vma_destroy_async(struct i915_vma *vma)
>>>>+{
>>>>+    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>+    struct intel_gt *gt;
>>>>+
>>>>+    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>+        async = false;
>>>>+
>>>>+    mutex_lock(&vma->vm->mutex);
>>>>+    /*
>>>>+     * Ensure any asynchronous binding is complete while using
>>>>+     * async unbind as we will be releasing the vma here.
>>>>+     */
>>>>+    if (async && i915_active_wait(&vma->active))
>>>>+        async = false;
>>>>+
>>>>+    force_unbind(vma, async);
>>>>     list_del_init(&vma->vm_link);
>>>>     vm_ddestroy = vma->vm_ddestroy;
>>>>     vma->vm_ddestroy = false;
>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>b/drivers/gpu/drm/i915/i915_vma.h
>>>>index 737ef310d046..25f15965dab8 100644
>>>>--- a/drivers/gpu/drm/i915/i915_vma.h
>>>>+++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>@@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>> void i915_vma_destroy_locked(struct i915_vma *vma);
>>>> void i915_vma_destroy(struct i915_vma *vma);
>>>>+void i915_vma_destroy_async(struct i915_vma *vma);
>>>> #define assert_vma_held(vma) 
>>>>dma_resv_assert_held((vma)->obj->base.resv)
>>>>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>index e5600f358a15..431d40bb1dee 100644
>>>>--- a/include/uapi/drm/i915_drm.h
>>>>+++ b/include/uapi/drm/i915_drm.h
>>>>@@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>  * any error.
>>>>  *
>>>>  * VM_BIND/UNBIND ioctl calls executed on different CPU threads 
>>>>concurrently
>>>>- * are not ordered.
>>>>+ * are not ordered. Furthermore, parts of the VM_UNBIND 
>>>>operation can be done
>>>>+ * asynchronously.
>>>>  */
>>>> struct drm_i915_gem_vm_unbind {
>>>>     /** @vm_id: VM (address space) id to bind */
Niranjana Vishwanathapura Nov. 15, 2022, 11:15 p.m. UTC | #7
On Tue, Nov 15, 2022 at 08:33:47AM -0800, Niranjana Vishwanathapura wrote:
>On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>>On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>>On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>>On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>>Asynchronously unbind the vma upon vm_unbind call.
>>>>>Fall back to synchronous unbind if backend doesn't support
>>>>>async unbind or if async unbind fails.
>>>>>
>>>>>No need for vm_unbind out fence support as i915 will internally
>>>>>handle all sequencing and user need not try to sequence any
>>>>>operation with the unbind completion.
>>>>>
>>>>>v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>>
>>>>>Signed-off-by: Niranjana Vishwanathapura 
>>>>><niranjana.vishwanathapura@intel.com>
>>>>
>>>>This only does it for non-partial vma, right? Or was that 
>>>>changed somewhere?
>>>>
>>>
>>>No, it applies to any vma (partial or non-partial).
>>>It was so from the beginning.
>>
>>Doesn't __i915_vma_unbind_async() return an error when mm.pages != 
>>vma->pages? IIRC this was discussed before. Just trying to think 
>>about the consequences of this change.
>
>I am not seeing any such restriction. Let me probe and check if there
>is any such restriction anywhere in the call chain.

I checked and I am not seeing any restriction anywher in the call chain.

Niranjana

>
>Niranjana
>
>>
>>>
>>>Niranjana
>>>
>>>>Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>>
>>>>>---
>>>>> .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>>> drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>>>>> drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>>> include/uapi/drm/i915_drm.h                   |  3 +-
>>>>> 4 files changed, 51 insertions(+), 6 deletions(-)
>>>>>
>>>>>diff --git 
>>>>>a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>>b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>index d87d1210365b..36651b447966 100644
>>>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>@@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct 
>>>>>i915_address_space *vm,
>>>>>      */
>>>>>     obj = vma->obj;
>>>>>     i915_gem_object_lock(obj, NULL);
>>>>>-    i915_vma_destroy(vma);
>>>>>+    i915_vma_destroy_async(vma);
>>>>>     i915_gem_object_unlock(obj);
>>>>>     i915_gem_object_put(obj);
>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>>b/drivers/gpu/drm/i915/i915_vma.c
>>>>>index 7cf77c67d755..483d25f2425c 100644
>>>>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>>@@ -42,6 +42,8 @@
>>>>> #include "i915_vma.h"
>>>>> #include "i915_vma_resource.h"
>>>>>+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
>>>>>+
>>>>> static inline void assert_vma_held_evict(const struct i915_vma *vma)
>>>>> {
>>>>>     /*
>>>>>@@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>>     spin_unlock_irq(&gt->closed_lock);
>>>>> }
>>>>>-static void force_unbind(struct i915_vma *vma)
>>>>>+static void force_unbind(struct i915_vma *vma, bool async)
>>>>> {
>>>>>     if (!drm_mm_node_allocated(&vma->node))
>>>>>         return;
>>>>>@@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma *vma)
>>>>>         i915_vma_set_purged(vma);
>>>>>     atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>>-    WARN_ON(__i915_vma_unbind(vma));
>>>>>+    if (async) {
>>>>>+        struct dma_fence *fence;
>>>>>+
>>>>>+        fence = __i915_vma_unbind_async(vma);
>>>>>+        if (IS_ERR_OR_NULL(fence)) {
>>>>>+            async = false;
>>>>>+        } else {
>>>>>+            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>>+                       DMA_RESV_USAGE_READ);
>>>>>+            dma_fence_put(fence);
>>>>>+        }
>>>>>+    }
>>>>>+
>>>>>+    if (!async)
>>>>>+        WARN_ON(__i915_vma_unbind(vma));
>>>>>     GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>>> }
>>>>>@@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
>>>>> {
>>>>>     lockdep_assert_held(&vma->vm->mutex);
>>>>>-    force_unbind(vma);
>>>>>+    force_unbind(vma, false);
>>>>>     list_del_init(&vma->vm_link);
>>>>>     release_references(vma, vma->vm->gt, false);
>>>>> }
>>>>>@@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>>     bool vm_ddestroy;
>>>>>     mutex_lock(&vma->vm->mutex);
>>>>>-    force_unbind(vma);
>>>>>+    force_unbind(vma, false);
>>>>>+    list_del_init(&vma->vm_link);
>>>>>+    vm_ddestroy = vma->vm_ddestroy;
>>>>>+    vma->vm_ddestroy = false;
>>>>>+
>>>>>+    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>>+    gt = vma->vm->gt;
>>>>>+    mutex_unlock(&vma->vm->mutex);
>>>>>+    release_references(vma, gt, vm_ddestroy);
>>>>>+}
>>>>>+
>>>>>+void i915_vma_destroy_async(struct i915_vma *vma)
>>>>>+{
>>>>>+    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>>+    struct intel_gt *gt;
>>>>>+
>>>>>+    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>>+        async = false;
>>>>>+
>>>>>+    mutex_lock(&vma->vm->mutex);
>>>>>+    /*
>>>>>+     * Ensure any asynchronous binding is complete while using
>>>>>+     * async unbind as we will be releasing the vma here.
>>>>>+     */
>>>>>+    if (async && i915_active_wait(&vma->active))
>>>>>+        async = false;
>>>>>+
>>>>>+    force_unbind(vma, async);
>>>>>     list_del_init(&vma->vm_link);
>>>>>     vm_ddestroy = vma->vm_ddestroy;
>>>>>     vma->vm_ddestroy = false;
>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>>b/drivers/gpu/drm/i915/i915_vma.h
>>>>>index 737ef310d046..25f15965dab8 100644
>>>>>--- a/drivers/gpu/drm/i915/i915_vma.h
>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>>@@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>>> void i915_vma_destroy_locked(struct i915_vma *vma);
>>>>> void i915_vma_destroy(struct i915_vma *vma);
>>>>>+void i915_vma_destroy_async(struct i915_vma *vma);
>>>>> #define assert_vma_held(vma) 
>>>>>dma_resv_assert_held((vma)->obj->base.resv)
>>>>>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>>index e5600f358a15..431d40bb1dee 100644
>>>>>--- a/include/uapi/drm/i915_drm.h
>>>>>+++ b/include/uapi/drm/i915_drm.h
>>>>>@@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>>  * any error.
>>>>>  *
>>>>>  * VM_BIND/UNBIND ioctl calls executed on different CPU 
>>>>>threads concurrently
>>>>>- * are not ordered.
>>>>>+ * are not ordered. Furthermore, parts of the VM_UNBIND 
>>>>>operation can be done
>>>>>+ * asynchronously.
>>>>>  */
>>>>> struct drm_i915_gem_vm_unbind {
>>>>>     /** @vm_id: VM (address space) id to bind */
Niranjana Vishwanathapura Nov. 16, 2022, 12:37 a.m. UTC | #8
On Tue, Nov 15, 2022 at 03:15:03PM -0800, Niranjana Vishwanathapura wrote:
>On Tue, Nov 15, 2022 at 08:33:47AM -0800, Niranjana Vishwanathapura wrote:
>>On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>>>On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>>>On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>>>On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>>>Asynchronously unbind the vma upon vm_unbind call.
>>>>>>Fall back to synchronous unbind if backend doesn't support
>>>>>>async unbind or if async unbind fails.
>>>>>>
>>>>>>No need for vm_unbind out fence support as i915 will internally
>>>>>>handle all sequencing and user need not try to sequence any
>>>>>>operation with the unbind completion.
>>>>>>
>>>>>>v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>>>
>>>>>>Signed-off-by: Niranjana Vishwanathapura 
>>>>>><niranjana.vishwanathapura@intel.com>
>>>>>
>>>>>This only does it for non-partial vma, right? Or was that 
>>>>>changed somewhere?
>>>>>
>>>>
>>>>No, it applies to any vma (partial or non-partial).
>>>>It was so from the beginning.
>>>
>>>Doesn't __i915_vma_unbind_async() return an error when mm.pages != 
>>>vma->pages? IIRC this was discussed before. Just trying to think 
>>>about the consequences of this change.
>>
>>I am not seeing any such restriction. Let me probe and check if there
>>is any such restriction anywhere in the call chain.
>
>I checked and I am not seeing any restriction anywher in the call chain.
>

Note that just like binding case, unbinding is also synchronous unless
there is a pending activity, in which case, it will be asynchronous.

Niranjana

>Niranjana
>
>>
>>Niranjana
>>
>>>
>>>>
>>>>Niranjana
>>>>
>>>>>Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>>>
>>>>>>---
>>>>>> .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>>>> drivers/gpu/drm/i915/i915_vma.c               | 51 +++++++++++++++++--
>>>>>> drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>>>> include/uapi/drm/i915_drm.h                   |  3 +-
>>>>>> 4 files changed, 51 insertions(+), 6 deletions(-)
>>>>>>
>>>>>>diff --git 
>>>>>>a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>>>b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>index d87d1210365b..36651b447966 100644
>>>>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>@@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct 
>>>>>>i915_address_space *vm,
>>>>>>      */
>>>>>>     obj = vma->obj;
>>>>>>     i915_gem_object_lock(obj, NULL);
>>>>>>-    i915_vma_destroy(vma);
>>>>>>+    i915_vma_destroy_async(vma);
>>>>>>     i915_gem_object_unlock(obj);
>>>>>>     i915_gem_object_put(obj);
>>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>>>b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>index 7cf77c67d755..483d25f2425c 100644
>>>>>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>@@ -42,6 +42,8 @@
>>>>>> #include "i915_vma.h"
>>>>>> #include "i915_vma_resource.h"
>>>>>>+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
>>>>>>+
>>>>>> static inline void assert_vma_held_evict(const struct i915_vma *vma)
>>>>>> {
>>>>>>     /*
>>>>>>@@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>>>     spin_unlock_irq(&gt->closed_lock);
>>>>>> }
>>>>>>-static void force_unbind(struct i915_vma *vma)
>>>>>>+static void force_unbind(struct i915_vma *vma, bool async)
>>>>>> {
>>>>>>     if (!drm_mm_node_allocated(&vma->node))
>>>>>>         return;
>>>>>>@@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma *vma)
>>>>>>         i915_vma_set_purged(vma);
>>>>>>     atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>>>-    WARN_ON(__i915_vma_unbind(vma));
>>>>>>+    if (async) {
>>>>>>+        struct dma_fence *fence;
>>>>>>+
>>>>>>+        fence = __i915_vma_unbind_async(vma);
>>>>>>+        if (IS_ERR_OR_NULL(fence)) {
>>>>>>+            async = false;
>>>>>>+        } else {
>>>>>>+            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>>>+                       DMA_RESV_USAGE_READ);
>>>>>>+            dma_fence_put(fence);
>>>>>>+        }
>>>>>>+    }
>>>>>>+
>>>>>>+    if (!async)
>>>>>>+        WARN_ON(__i915_vma_unbind(vma));
>>>>>>     GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>>>> }
>>>>>>@@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
>>>>>> {
>>>>>>     lockdep_assert_held(&vma->vm->mutex);
>>>>>>-    force_unbind(vma);
>>>>>>+    force_unbind(vma, false);
>>>>>>     list_del_init(&vma->vm_link);
>>>>>>     release_references(vma, vma->vm->gt, false);
>>>>>> }
>>>>>>@@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>>>     bool vm_ddestroy;
>>>>>>     mutex_lock(&vma->vm->mutex);
>>>>>>-    force_unbind(vma);
>>>>>>+    force_unbind(vma, false);
>>>>>>+    list_del_init(&vma->vm_link);
>>>>>>+    vm_ddestroy = vma->vm_ddestroy;
>>>>>>+    vma->vm_ddestroy = false;
>>>>>>+
>>>>>>+    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>>>+    gt = vma->vm->gt;
>>>>>>+    mutex_unlock(&vma->vm->mutex);
>>>>>>+    release_references(vma, gt, vm_ddestroy);
>>>>>>+}
>>>>>>+
>>>>>>+void i915_vma_destroy_async(struct i915_vma *vma)
>>>>>>+{
>>>>>>+    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>>>+    struct intel_gt *gt;
>>>>>>+
>>>>>>+    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>>>+        async = false;
>>>>>>+
>>>>>>+    mutex_lock(&vma->vm->mutex);
>>>>>>+    /*
>>>>>>+     * Ensure any asynchronous binding is complete while using
>>>>>>+     * async unbind as we will be releasing the vma here.
>>>>>>+     */
>>>>>>+    if (async && i915_active_wait(&vma->active))
>>>>>>+        async = false;
>>>>>>+
>>>>>>+    force_unbind(vma, async);
>>>>>>     list_del_init(&vma->vm_link);
>>>>>>     vm_ddestroy = vma->vm_ddestroy;
>>>>>>     vma->vm_ddestroy = false;
>>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>>>b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>index 737ef310d046..25f15965dab8 100644
>>>>>>--- a/drivers/gpu/drm/i915/i915_vma.h
>>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>@@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>>>> void i915_vma_destroy_locked(struct i915_vma *vma);
>>>>>> void i915_vma_destroy(struct i915_vma *vma);
>>>>>>+void i915_vma_destroy_async(struct i915_vma *vma);
>>>>>> #define assert_vma_held(vma) 
>>>>>>dma_resv_assert_held((vma)->obj->base.resv)
>>>>>>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>>>index e5600f358a15..431d40bb1dee 100644
>>>>>>--- a/include/uapi/drm/i915_drm.h
>>>>>>+++ b/include/uapi/drm/i915_drm.h
>>>>>>@@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>>>  * any error.
>>>>>>  *
>>>>>>  * VM_BIND/UNBIND ioctl calls executed on different CPU 
>>>>>>threads concurrently
>>>>>>- * are not ordered.
>>>>>>+ * are not ordered. Furthermore, parts of the VM_UNBIND 
>>>>>>operation can be done
>>>>>>+ * asynchronously.
>>>>>>  */
>>>>>> struct drm_i915_gem_vm_unbind {
>>>>>>     /** @vm_id: VM (address space) id to bind */
Matthew Auld Nov. 23, 2022, 11:42 a.m. UTC | #9
On 16/11/2022 00:37, Niranjana Vishwanathapura wrote:
> On Tue, Nov 15, 2022 at 03:15:03PM -0800, Niranjana Vishwanathapura wrote:
>> On Tue, Nov 15, 2022 at 08:33:47AM -0800, Niranjana Vishwanathapura 
>> wrote:
>>> On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>>>> On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>>>> On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>>>> On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>>>> Asynchronously unbind the vma upon vm_unbind call.
>>>>>>> Fall back to synchronous unbind if backend doesn't support
>>>>>>> async unbind or if async unbind fails.
>>>>>>>
>>>>>>> No need for vm_unbind out fence support as i915 will internally
>>>>>>> handle all sequencing and user need not try to sequence any
>>>>>>> operation with the unbind completion.
>>>>>>>
>>>>>>> v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>>>>
>>>>>>> Signed-off-by: Niranjana Vishwanathapura 
>>>>>>> <niranjana.vishwanathapura@intel.com>
>>>>>>
>>>>>> This only does it for non-partial vma, right? Or was that changed 
>>>>>> somewhere?
>>>>>>
>>>>>
>>>>> No, it applies to any vma (partial or non-partial).
>>>>> It was so from the beginning.
>>>>
>>>> Doesn't __i915_vma_unbind_async() return an error when mm.pages != 
>>>> vma->pages? IIRC this was discussed before. Just trying to think 
>>>> about the consequences of this change.
>>>
>>> I am not seeing any such restriction. Let me probe and check if there
>>> is any such restriction anywhere in the call chain.
>>
>> I checked and I am not seeing any restriction anywher in the call chain.
>>
> 
> Note that just like binding case, unbinding is also synchronous unless
> there is a pending activity, in which case, it will be asynchronous.

In __i915_vma_unbind_async() there is:

if (i915_vma_is_pinned(vma) ||
     &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
         return ERR_PTR(-EAGAIN);

AFAICT we then also don't get any pipelined moves with such an object, 
if there is such a binding present.

> 
> Niranjana
> 
>> Niranjana
>>
>>>
>>> Niranjana
>>>
>>>>
>>>>>
>>>>> Niranjana
>>>>>
>>>>>> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>>>>
>>>>>>> ---
>>>>>>>  .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>>>>>  drivers/gpu/drm/i915/i915_vma.c               | 51 
>>>>>>> +++++++++++++++++--
>>>>>>>  drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>>>>>  include/uapi/drm/i915_drm.h                   |  3 +-
>>>>>>>  4 files changed, 51 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>> index d87d1210365b..36651b447966 100644
>>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>> @@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct 
>>>>>>> i915_address_space *vm,
>>>>>>>       */
>>>>>>>      obj = vma->obj;
>>>>>>>      i915_gem_object_lock(obj, NULL);
>>>>>>> -    i915_vma_destroy(vma);
>>>>>>> +    i915_vma_destroy_async(vma);
>>>>>>>      i915_gem_object_unlock(obj);
>>>>>>>      i915_gem_object_put(obj);
>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>>>> b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>> index 7cf77c67d755..483d25f2425c 100644
>>>>>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>>>>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>> @@ -42,6 +42,8 @@
>>>>>>>  #include "i915_vma.h"
>>>>>>>  #include "i915_vma_resource.h"
>>>>>>> +static struct dma_fence *__i915_vma_unbind_async(struct i915_vma 
>>>>>>> *vma);
>>>>>>> +
>>>>>>>  static inline void assert_vma_held_evict(const struct i915_vma 
>>>>>>> *vma)
>>>>>>>  {
>>>>>>>      /*
>>>>>>> @@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>>>>      spin_unlock_irq(&gt->closed_lock);
>>>>>>>  }
>>>>>>> -static void force_unbind(struct i915_vma *vma)
>>>>>>> +static void force_unbind(struct i915_vma *vma, bool async)
>>>>>>>  {
>>>>>>>      if (!drm_mm_node_allocated(&vma->node))
>>>>>>>          return;
>>>>>>> @@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma 
>>>>>>> *vma)
>>>>>>>          i915_vma_set_purged(vma);
>>>>>>>      atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>>>> -    WARN_ON(__i915_vma_unbind(vma));
>>>>>>> +    if (async) {
>>>>>>> +        struct dma_fence *fence;
>>>>>>> +
>>>>>>> +        fence = __i915_vma_unbind_async(vma);
>>>>>>> +        if (IS_ERR_OR_NULL(fence)) {
>>>>>>> +            async = false;
>>>>>>> +        } else {
>>>>>>> +            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>>>> +                       DMA_RESV_USAGE_READ);
>>>>>>> +            dma_fence_put(fence);
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (!async)
>>>>>>> +        WARN_ON(__i915_vma_unbind(vma));
>>>>>>>      GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>>>>>  }
>>>>>>> @@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct 
>>>>>>> i915_vma *vma)
>>>>>>>  {
>>>>>>>      lockdep_assert_held(&vma->vm->mutex);
>>>>>>> -    force_unbind(vma);
>>>>>>> +    force_unbind(vma, false);
>>>>>>>      list_del_init(&vma->vm_link);
>>>>>>>      release_references(vma, vma->vm->gt, false);
>>>>>>>  }
>>>>>>> @@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>>>>      bool vm_ddestroy;
>>>>>>>      mutex_lock(&vma->vm->mutex);
>>>>>>> -    force_unbind(vma);
>>>>>>> +    force_unbind(vma, false);
>>>>>>> +    list_del_init(&vma->vm_link);
>>>>>>> +    vm_ddestroy = vma->vm_ddestroy;
>>>>>>> +    vma->vm_ddestroy = false;
>>>>>>> +
>>>>>>> +    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>>>> +    gt = vma->vm->gt;
>>>>>>> +    mutex_unlock(&vma->vm->mutex);
>>>>>>> +    release_references(vma, gt, vm_ddestroy);
>>>>>>> +}
>>>>>>> +
>>>>>>> +void i915_vma_destroy_async(struct i915_vma *vma)
>>>>>>> +{
>>>>>>> +    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>>>> +    struct intel_gt *gt;
>>>>>>> +
>>>>>>> +    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>>>> +        async = false;
>>>>>>> +
>>>>>>> +    mutex_lock(&vma->vm->mutex);
>>>>>>> +    /*
>>>>>>> +     * Ensure any asynchronous binding is complete while using
>>>>>>> +     * async unbind as we will be releasing the vma here.
>>>>>>> +     */
>>>>>>> +    if (async && i915_active_wait(&vma->active))
>>>>>>> +        async = false;
>>>>>>> +
>>>>>>> +    force_unbind(vma, async);
>>>>>>>      list_del_init(&vma->vm_link);
>>>>>>>      vm_ddestroy = vma->vm_ddestroy;
>>>>>>>      vma->vm_ddestroy = false;
>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>>>> b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>> index 737ef310d046..25f15965dab8 100644
>>>>>>> --- a/drivers/gpu/drm/i915/i915_vma.h
>>>>>>> +++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>> @@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>>>>>  void i915_vma_destroy_locked(struct i915_vma *vma);
>>>>>>>  void i915_vma_destroy(struct i915_vma *vma);
>>>>>>> +void i915_vma_destroy_async(struct i915_vma *vma);
>>>>>>>  #define assert_vma_held(vma) 
>>>>>>> dma_resv_assert_held((vma)->obj->base.resv)
>>>>>>> diff --git a/include/uapi/drm/i915_drm.h 
>>>>>>> b/include/uapi/drm/i915_drm.h
>>>>>>> index e5600f358a15..431d40bb1dee 100644
>>>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>>>> @@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>>>>   * any error.
>>>>>>>   *
>>>>>>>   * VM_BIND/UNBIND ioctl calls executed on different CPU threads 
>>>>>>> concurrently
>>>>>>> - * are not ordered.
>>>>>>> + * are not ordered. Furthermore, parts of the VM_UNBIND 
>>>>>>> operation can be done
>>>>>>> + * asynchronously.
>>>>>>>   */
>>>>>>>  struct drm_i915_gem_vm_unbind {
>>>>>>>      /** @vm_id: VM (address space) id to bind */
Niranjana Vishwanathapura Nov. 29, 2022, 11:26 p.m. UTC | #10
On Wed, Nov 23, 2022 at 11:42:58AM +0000, Matthew Auld wrote:
>On 16/11/2022 00:37, Niranjana Vishwanathapura wrote:
>>On Tue, Nov 15, 2022 at 03:15:03PM -0800, Niranjana Vishwanathapura wrote:
>>>On Tue, Nov 15, 2022 at 08:33:47AM -0800, Niranjana 
>>>Vishwanathapura wrote:
>>>>On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>>>>>On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>>>>>On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>>>>>On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>>>>>Asynchronously unbind the vma upon vm_unbind call.
>>>>>>>>Fall back to synchronous unbind if backend doesn't support
>>>>>>>>async unbind or if async unbind fails.
>>>>>>>>
>>>>>>>>No need for vm_unbind out fence support as i915 will internally
>>>>>>>>handle all sequencing and user need not try to sequence any
>>>>>>>>operation with the unbind completion.
>>>>>>>>
>>>>>>>>v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>>>>>
>>>>>>>>Signed-off-by: Niranjana Vishwanathapura 
>>>>>>>><niranjana.vishwanathapura@intel.com>
>>>>>>>
>>>>>>>This only does it for non-partial vma, right? Or was that 
>>>>>>>changed somewhere?
>>>>>>>
>>>>>>
>>>>>>No, it applies to any vma (partial or non-partial).
>>>>>>It was so from the beginning.
>>>>>
>>>>>Doesn't __i915_vma_unbind_async() return an error when 
>>>>>mm.pages != vma->pages? IIRC this was discussed before. Just 
>>>>>trying to think about the consequences of this change.
>>>>
>>>>I am not seeing any such restriction. Let me probe and check if there
>>>>is any such restriction anywhere in the call chain.
>>>
>>>I checked and I am not seeing any restriction anywher in the call chain.
>>>
>>
>>Note that just like binding case, unbinding is also synchronous unless
>>there is a pending activity, in which case, it will be asynchronous.
>
>In __i915_vma_unbind_async() there is:
>
>if (i915_vma_is_pinned(vma) ||
>    &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
>        return ERR_PTR(-EAGAIN);
>
>AFAICT we then also don't get any pipelined moves with such an object, 
>if there is such a binding present.

I had to remove this check as otherwise for VM_BIND (persistent) mappings,
it will alwasy be true and we will always endup with -EAGAIN.
Persistent mappings, as they support partial binding, can't have an sg
table which is just a pointer to object's sg table.

Niranjana

>
>>
>>Niranjana
>>
>>>Niranjana
>>>
>>>>
>>>>Niranjana
>>>>
>>>>>
>>>>>>
>>>>>>Niranjana
>>>>>>
>>>>>>>Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>>>>>
>>>>>>>>---
>>>>>>>> .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>>>>>> drivers/gpu/drm/i915/i915_vma.c               | 51 
>>>>>>>>+++++++++++++++++--
>>>>>>>> drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>>>>>> include/uapi/drm/i915_drm.h                   |  3 +-
>>>>>>>> 4 files changed, 51 insertions(+), 6 deletions(-)
>>>>>>>>
>>>>>>>>diff --git 
>>>>>>>>a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>>>>>b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>index d87d1210365b..36651b447966 100644
>>>>>>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>@@ -210,7 +210,7 @@ static int 
>>>>>>>>i915_gem_vm_unbind_vma(struct i915_address_space *vm,
>>>>>>>>      */
>>>>>>>>     obj = vma->obj;
>>>>>>>>     i915_gem_object_lock(obj, NULL);
>>>>>>>>-    i915_vma_destroy(vma);
>>>>>>>>+    i915_vma_destroy_async(vma);
>>>>>>>>     i915_gem_object_unlock(obj);
>>>>>>>>     i915_gem_object_put(obj);
>>>>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>>>>>b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>index 7cf77c67d755..483d25f2425c 100644
>>>>>>>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>@@ -42,6 +42,8 @@
>>>>>>>> #include "i915_vma.h"
>>>>>>>> #include "i915_vma_resource.h"
>>>>>>>>+static struct dma_fence *__i915_vma_unbind_async(struct 
>>>>>>>>i915_vma *vma);
>>>>>>>>+
>>>>>>>> static inline void assert_vma_held_evict(const struct 
>>>>>>>>i915_vma *vma)
>>>>>>>> {
>>>>>>>>     /*
>>>>>>>>@@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>>>>>     spin_unlock_irq(&gt->closed_lock);
>>>>>>>> }
>>>>>>>>-static void force_unbind(struct i915_vma *vma)
>>>>>>>>+static void force_unbind(struct i915_vma *vma, bool async)
>>>>>>>> {
>>>>>>>>     if (!drm_mm_node_allocated(&vma->node))
>>>>>>>>         return;
>>>>>>>>@@ -1727,7 +1729,21 @@ static void force_unbind(struct 
>>>>>>>>i915_vma *vma)
>>>>>>>>         i915_vma_set_purged(vma);
>>>>>>>>     atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>>>>>-    WARN_ON(__i915_vma_unbind(vma));
>>>>>>>>+    if (async) {
>>>>>>>>+        struct dma_fence *fence;
>>>>>>>>+
>>>>>>>>+        fence = __i915_vma_unbind_async(vma);
>>>>>>>>+        if (IS_ERR_OR_NULL(fence)) {
>>>>>>>>+            async = false;
>>>>>>>>+        } else {
>>>>>>>>+            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>>>>>+                       DMA_RESV_USAGE_READ);
>>>>>>>>+            dma_fence_put(fence);
>>>>>>>>+        }
>>>>>>>>+    }
>>>>>>>>+
>>>>>>>>+    if (!async)
>>>>>>>>+        WARN_ON(__i915_vma_unbind(vma));
>>>>>>>>     GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>>>>>> }
>>>>>>>>@@ -1787,7 +1803,7 @@ void 
>>>>>>>>i915_vma_destroy_locked(struct i915_vma *vma)
>>>>>>>> {
>>>>>>>>     lockdep_assert_held(&vma->vm->mutex);
>>>>>>>>-    force_unbind(vma);
>>>>>>>>+    force_unbind(vma, false);
>>>>>>>>     list_del_init(&vma->vm_link);
>>>>>>>>     release_references(vma, vma->vm->gt, false);
>>>>>>>> }
>>>>>>>>@@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>>>>>     bool vm_ddestroy;
>>>>>>>>     mutex_lock(&vma->vm->mutex);
>>>>>>>>-    force_unbind(vma);
>>>>>>>>+    force_unbind(vma, false);
>>>>>>>>+    list_del_init(&vma->vm_link);
>>>>>>>>+    vm_ddestroy = vma->vm_ddestroy;
>>>>>>>>+    vma->vm_ddestroy = false;
>>>>>>>>+
>>>>>>>>+    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>>>>>+    gt = vma->vm->gt;
>>>>>>>>+    mutex_unlock(&vma->vm->mutex);
>>>>>>>>+    release_references(vma, gt, vm_ddestroy);
>>>>>>>>+}
>>>>>>>>+
>>>>>>>>+void i915_vma_destroy_async(struct i915_vma *vma)
>>>>>>>>+{
>>>>>>>>+    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>>>>>+    struct intel_gt *gt;
>>>>>>>>+
>>>>>>>>+    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>>>>>+        async = false;
>>>>>>>>+
>>>>>>>>+    mutex_lock(&vma->vm->mutex);
>>>>>>>>+    /*
>>>>>>>>+     * Ensure any asynchronous binding is complete while using
>>>>>>>>+     * async unbind as we will be releasing the vma here.
>>>>>>>>+     */
>>>>>>>>+    if (async && i915_active_wait(&vma->active))
>>>>>>>>+        async = false;
>>>>>>>>+
>>>>>>>>+    force_unbind(vma, async);
>>>>>>>>     list_del_init(&vma->vm_link);
>>>>>>>>     vm_ddestroy = vma->vm_ddestroy;
>>>>>>>>     vma->vm_ddestroy = false;
>>>>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>>>>>b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>index 737ef310d046..25f15965dab8 100644
>>>>>>>>--- a/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>@@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>>>>>> void i915_vma_destroy_locked(struct i915_vma *vma);
>>>>>>>> void i915_vma_destroy(struct i915_vma *vma);
>>>>>>>>+void i915_vma_destroy_async(struct i915_vma *vma);
>>>>>>>> #define assert_vma_held(vma) 
>>>>>>>>dma_resv_assert_held((vma)->obj->base.resv)
>>>>>>>>diff --git a/include/uapi/drm/i915_drm.h 
>>>>>>>>b/include/uapi/drm/i915_drm.h
>>>>>>>>index e5600f358a15..431d40bb1dee 100644
>>>>>>>>--- a/include/uapi/drm/i915_drm.h
>>>>>>>>+++ b/include/uapi/drm/i915_drm.h
>>>>>>>>@@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>>>>>  * any error.
>>>>>>>>  *
>>>>>>>>  * VM_BIND/UNBIND ioctl calls executed on different CPU 
>>>>>>>>threads concurrently
>>>>>>>>- * are not ordered.
>>>>>>>>+ * are not ordered. Furthermore, parts of the VM_UNBIND 
>>>>>>>>operation can be done
>>>>>>>>+ * asynchronously.
>>>>>>>>  */
>>>>>>>> struct drm_i915_gem_vm_unbind {
>>>>>>>>     /** @vm_id: VM (address space) id to bind */
Matthew Auld Dec. 1, 2022, 10:10 a.m. UTC | #11
On 29/11/2022 23:26, Niranjana Vishwanathapura wrote:
> On Wed, Nov 23, 2022 at 11:42:58AM +0000, Matthew Auld wrote:
>> On 16/11/2022 00:37, Niranjana Vishwanathapura wrote:
>>> On Tue, Nov 15, 2022 at 03:15:03PM -0800, Niranjana Vishwanathapura 
>>> wrote:
>>>> On Tue, Nov 15, 2022 at 08:33:47AM -0800, Niranjana Vishwanathapura 
>>>> wrote:
>>>>> On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>>>>>> On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>>>>>> On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>>>>>> On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>>>>>> Asynchronously unbind the vma upon vm_unbind call.
>>>>>>>>> Fall back to synchronous unbind if backend doesn't support
>>>>>>>>> async unbind or if async unbind fails.
>>>>>>>>>
>>>>>>>>> No need for vm_unbind out fence support as i915 will internally
>>>>>>>>> handle all sequencing and user need not try to sequence any
>>>>>>>>> operation with the unbind completion.
>>>>>>>>>
>>>>>>>>> v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>>>>>>
>>>>>>>>> Signed-off-by: Niranjana Vishwanathapura 
>>>>>>>>> <niranjana.vishwanathapura@intel.com>
>>>>>>>>
>>>>>>>> This only does it for non-partial vma, right? Or was that 
>>>>>>>> changed somewhere?
>>>>>>>>
>>>>>>>
>>>>>>> No, it applies to any vma (partial or non-partial).
>>>>>>> It was so from the beginning.
>>>>>>
>>>>>> Doesn't __i915_vma_unbind_async() return an error when mm.pages != 
>>>>>> vma->pages? IIRC this was discussed before. Just trying to think 
>>>>>> about the consequences of this change.
>>>>>
>>>>> I am not seeing any such restriction. Let me probe and check if there
>>>>> is any such restriction anywhere in the call chain.
>>>>
>>>> I checked and I am not seeing any restriction anywher in the call 
>>>> chain.
>>>>
>>>
>>> Note that just like binding case, unbinding is also synchronous unless
>>> there is a pending activity, in which case, it will be asynchronous.
>>
>> In __i915_vma_unbind_async() there is:
>>
>> if (i915_vma_is_pinned(vma) ||
>>    &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
>>        return ERR_PTR(-EAGAIN);
>>
>> AFAICT we then also don't get any pipelined moves with such an object, 
>> if there is such a binding present.
> 
> I had to remove this check as otherwise for VM_BIND (persistent) mappings,
> it will alwasy be true and we will always endup with -EAGAIN.
> Persistent mappings, as they support partial binding, can't have an sg
> table which is just a pointer to object's sg table.

Ok, maybe make that a seperate patch, since it seems to change the 
behaviour for more than just persisent mappings, AFAICT.

> 
> Niranjana
> 
>>
>>>
>>> Niranjana
>>>
>>>> Niranjana
>>>>
>>>>>
>>>>> Niranjana
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Niranjana
>>>>>>>
>>>>>>>> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>>>>>>
>>>>>>>>> ---
>>>>>>>>>  .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>>>>>>>  drivers/gpu/drm/i915/i915_vma.c               | 51 
>>>>>>>>> +++++++++++++++++--
>>>>>>>>>  drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>>>>>>>  include/uapi/drm/i915_drm.h                   |  3 +-
>>>>>>>>>  4 files changed, 51 insertions(+), 6 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>> index d87d1210365b..36651b447966 100644
>>>>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>> @@ -210,7 +210,7 @@ static int i915_gem_vm_unbind_vma(struct 
>>>>>>>>> i915_address_space *vm,
>>>>>>>>>       */
>>>>>>>>>      obj = vma->obj;
>>>>>>>>>      i915_gem_object_lock(obj, NULL);
>>>>>>>>> -    i915_vma_destroy(vma);
>>>>>>>>> +    i915_vma_destroy_async(vma);
>>>>>>>>>      i915_gem_object_unlock(obj);
>>>>>>>>>      i915_gem_object_put(obj);
>>>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>>>>>> b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>> index 7cf77c67d755..483d25f2425c 100644
>>>>>>>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>> @@ -42,6 +42,8 @@
>>>>>>>>>  #include "i915_vma.h"
>>>>>>>>>  #include "i915_vma_resource.h"
>>>>>>>>> +static struct dma_fence *__i915_vma_unbind_async(struct 
>>>>>>>>> i915_vma *vma);
>>>>>>>>> +
>>>>>>>>>  static inline void assert_vma_held_evict(const struct i915_vma 
>>>>>>>>> *vma)
>>>>>>>>>  {
>>>>>>>>>      /*
>>>>>>>>> @@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>>>>>>      spin_unlock_irq(&gt->closed_lock);
>>>>>>>>>  }
>>>>>>>>> -static void force_unbind(struct i915_vma *vma)
>>>>>>>>> +static void force_unbind(struct i915_vma *vma, bool async)
>>>>>>>>>  {
>>>>>>>>>      if (!drm_mm_node_allocated(&vma->node))
>>>>>>>>>          return;
>>>>>>>>> @@ -1727,7 +1729,21 @@ static void force_unbind(struct i915_vma 
>>>>>>>>> *vma)
>>>>>>>>>          i915_vma_set_purged(vma);
>>>>>>>>>      atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>>>>>> -    WARN_ON(__i915_vma_unbind(vma));
>>>>>>>>> +    if (async) {
>>>>>>>>> +        struct dma_fence *fence;
>>>>>>>>> +
>>>>>>>>> +        fence = __i915_vma_unbind_async(vma);
>>>>>>>>> +        if (IS_ERR_OR_NULL(fence)) {
>>>>>>>>> +            async = false;
>>>>>>>>> +        } else {
>>>>>>>>> +            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>>>>>> +                       DMA_RESV_USAGE_READ);
>>>>>>>>> +            dma_fence_put(fence);
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (!async)
>>>>>>>>> +        WARN_ON(__i915_vma_unbind(vma));
>>>>>>>>>      GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>>>>>>>  }
>>>>>>>>> @@ -1787,7 +1803,7 @@ void i915_vma_destroy_locked(struct 
>>>>>>>>> i915_vma *vma)
>>>>>>>>>  {
>>>>>>>>>      lockdep_assert_held(&vma->vm->mutex);
>>>>>>>>> -    force_unbind(vma);
>>>>>>>>> +    force_unbind(vma, false);
>>>>>>>>>      list_del_init(&vma->vm_link);
>>>>>>>>>      release_references(vma, vma->vm->gt, false);
>>>>>>>>>  }
>>>>>>>>> @@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>>>>>>      bool vm_ddestroy;
>>>>>>>>>      mutex_lock(&vma->vm->mutex);
>>>>>>>>> -    force_unbind(vma);
>>>>>>>>> +    force_unbind(vma, false);
>>>>>>>>> +    list_del_init(&vma->vm_link);
>>>>>>>>> +    vm_ddestroy = vma->vm_ddestroy;
>>>>>>>>> +    vma->vm_ddestroy = false;
>>>>>>>>> +
>>>>>>>>> +    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>>>>>> +    gt = vma->vm->gt;
>>>>>>>>> +    mutex_unlock(&vma->vm->mutex);
>>>>>>>>> +    release_references(vma, gt, vm_ddestroy);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +void i915_vma_destroy_async(struct i915_vma *vma)
>>>>>>>>> +{
>>>>>>>>> +    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>>>>>> +    struct intel_gt *gt;
>>>>>>>>> +
>>>>>>>>> +    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>>>>>> +        async = false;
>>>>>>>>> +
>>>>>>>>> +    mutex_lock(&vma->vm->mutex);
>>>>>>>>> +    /*
>>>>>>>>> +     * Ensure any asynchronous binding is complete while using
>>>>>>>>> +     * async unbind as we will be releasing the vma here.
>>>>>>>>> +     */
>>>>>>>>> +    if (async && i915_active_wait(&vma->active))
>>>>>>>>> +        async = false;
>>>>>>>>> +
>>>>>>>>> +    force_unbind(vma, async);
>>>>>>>>>      list_del_init(&vma->vm_link);
>>>>>>>>>      vm_ddestroy = vma->vm_ddestroy;
>>>>>>>>>      vma->vm_ddestroy = false;
>>>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>>>>>> b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>> index 737ef310d046..25f15965dab8 100644
>>>>>>>>> --- a/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>> @@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>>>>>>>  void i915_vma_destroy_locked(struct i915_vma *vma);
>>>>>>>>>  void i915_vma_destroy(struct i915_vma *vma);
>>>>>>>>> +void i915_vma_destroy_async(struct i915_vma *vma);
>>>>>>>>>  #define assert_vma_held(vma) 
>>>>>>>>> dma_resv_assert_held((vma)->obj->base.resv)
>>>>>>>>> diff --git a/include/uapi/drm/i915_drm.h 
>>>>>>>>> b/include/uapi/drm/i915_drm.h
>>>>>>>>> index e5600f358a15..431d40bb1dee 100644
>>>>>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>>>>>> @@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>>>>>>   * any error.
>>>>>>>>>   *
>>>>>>>>>   * VM_BIND/UNBIND ioctl calls executed on different CPU 
>>>>>>>>> threads concurrently
>>>>>>>>> - * are not ordered.
>>>>>>>>> + * are not ordered. Furthermore, parts of the VM_UNBIND 
>>>>>>>>> operation can be done
>>>>>>>>> + * asynchronously.
>>>>>>>>>   */
>>>>>>>>>  struct drm_i915_gem_vm_unbind {
>>>>>>>>>      /** @vm_id: VM (address space) id to bind */
Niranjana Vishwanathapura Dec. 1, 2022, 3:11 p.m. UTC | #12
On Thu, Dec 01, 2022 at 10:10:14AM +0000, Matthew Auld wrote:
>On 29/11/2022 23:26, Niranjana Vishwanathapura wrote:
>>On Wed, Nov 23, 2022 at 11:42:58AM +0000, Matthew Auld wrote:
>>>On 16/11/2022 00:37, Niranjana Vishwanathapura wrote:
>>>>On Tue, Nov 15, 2022 at 03:15:03PM -0800, Niranjana 
>>>>Vishwanathapura wrote:
>>>>>On Tue, Nov 15, 2022 at 08:33:47AM -0800, Niranjana 
>>>>>Vishwanathapura wrote:
>>>>>>On Tue, Nov 15, 2022 at 04:20:54PM +0000, Matthew Auld wrote:
>>>>>>>On 15/11/2022 16:15, Niranjana Vishwanathapura wrote:
>>>>>>>>On Tue, Nov 15, 2022 at 11:05:21AM +0000, Matthew Auld wrote:
>>>>>>>>>On 13/11/2022 07:57, Niranjana Vishwanathapura wrote:
>>>>>>>>>>Asynchronously unbind the vma upon vm_unbind call.
>>>>>>>>>>Fall back to synchronous unbind if backend doesn't support
>>>>>>>>>>async unbind or if async unbind fails.
>>>>>>>>>>
>>>>>>>>>>No need for vm_unbind out fence support as i915 will internally
>>>>>>>>>>handle all sequencing and user need not try to sequence any
>>>>>>>>>>operation with the unbind completion.
>>>>>>>>>>
>>>>>>>>>>v2: use i915_vma_destroy_async in vm_unbind ioctl
>>>>>>>>>>
>>>>>>>>>>Signed-off-by: Niranjana Vishwanathapura 
>>>>>>>>>><niranjana.vishwanathapura@intel.com>
>>>>>>>>>
>>>>>>>>>This only does it for non-partial vma, right? Or was 
>>>>>>>>>that changed somewhere?
>>>>>>>>>
>>>>>>>>
>>>>>>>>No, it applies to any vma (partial or non-partial).
>>>>>>>>It was so from the beginning.
>>>>>>>
>>>>>>>Doesn't __i915_vma_unbind_async() return an error when 
>>>>>>>mm.pages != vma->pages? IIRC this was discussed before. 
>>>>>>>Just trying to think about the consequences of this 
>>>>>>>change.
>>>>>>
>>>>>>I am not seeing any such restriction. Let me probe and check if there
>>>>>>is any such restriction anywhere in the call chain.
>>>>>
>>>>>I checked and I am not seeing any restriction anywher in the 
>>>>>call chain.
>>>>>
>>>>
>>>>Note that just like binding case, unbinding is also synchronous unless
>>>>there is a pending activity, in which case, it will be asynchronous.
>>>
>>>In __i915_vma_unbind_async() there is:
>>>
>>>if (i915_vma_is_pinned(vma) ||
>>>   &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
>>>       return ERR_PTR(-EAGAIN);
>>>
>>>AFAICT we then also don't get any pipelined moves with such an 
>>>object, if there is such a binding present.
>>
>>I had to remove this check as otherwise for VM_BIND (persistent) mappings,
>>it will alwasy be true and we will always endup with -EAGAIN.
>>Persistent mappings, as they support partial binding, can't have an sg
>>table which is just a pointer to object's sg table.
>
>Ok, maybe make that a seperate patch, since it seems to change the 
>behaviour for more than just persisent mappings, AFAICT.
>

Ok, will do.

Niranjana

>>
>>Niranjana
>>
>>>
>>>>
>>>>Niranjana
>>>>
>>>>>Niranjana
>>>>>
>>>>>>
>>>>>>Niranjana
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>Niranjana
>>>>>>>>
>>>>>>>>>Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>>>>>>>>>
>>>>>>>>>>---
>>>>>>>>>> .../drm/i915/gem/i915_gem_vm_bind_object.c    |  2 +-
>>>>>>>>>> drivers/gpu/drm/i915/i915_vma.c               | 51 
>>>>>>>>>>+++++++++++++++++--
>>>>>>>>>> drivers/gpu/drm/i915/i915_vma.h               |  1 +
>>>>>>>>>> include/uapi/drm/i915_drm.h                   |  3 +-
>>>>>>>>>> 4 files changed, 51 insertions(+), 6 deletions(-)
>>>>>>>>>>
>>>>>>>>>>diff --git 
>>>>>>>>>>a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c 
>>>>>>>>>>b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>>>index d87d1210365b..36651b447966 100644
>>>>>>>>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
>>>>>>>>>>@@ -210,7 +210,7 @@ static int 
>>>>>>>>>>i915_gem_vm_unbind_vma(struct i915_address_space 
>>>>>>>>>>*vm,
>>>>>>>>>>      */
>>>>>>>>>>     obj = vma->obj;
>>>>>>>>>>     i915_gem_object_lock(obj, NULL);
>>>>>>>>>>-    i915_vma_destroy(vma);
>>>>>>>>>>+    i915_vma_destroy_async(vma);
>>>>>>>>>>     i915_gem_object_unlock(obj);
>>>>>>>>>>     i915_gem_object_put(obj);
>>>>>>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>>>>>>>>>>b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>>>index 7cf77c67d755..483d25f2425c 100644
>>>>>>>>>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>>>>>>>>>@@ -42,6 +42,8 @@
>>>>>>>>>> #include "i915_vma.h"
>>>>>>>>>> #include "i915_vma_resource.h"
>>>>>>>>>>+static struct dma_fence 
>>>>>>>>>>*__i915_vma_unbind_async(struct i915_vma *vma);
>>>>>>>>>>+
>>>>>>>>>> static inline void assert_vma_held_evict(const 
>>>>>>>>>>struct i915_vma *vma)
>>>>>>>>>> {
>>>>>>>>>>     /*
>>>>>>>>>>@@ -1713,7 +1715,7 @@ void i915_vma_reopen(struct i915_vma *vma)
>>>>>>>>>>     spin_unlock_irq(&gt->closed_lock);
>>>>>>>>>> }
>>>>>>>>>>-static void force_unbind(struct i915_vma *vma)
>>>>>>>>>>+static void force_unbind(struct i915_vma *vma, bool async)
>>>>>>>>>> {
>>>>>>>>>>     if (!drm_mm_node_allocated(&vma->node))
>>>>>>>>>>         return;
>>>>>>>>>>@@ -1727,7 +1729,21 @@ static void 
>>>>>>>>>>force_unbind(struct i915_vma *vma)
>>>>>>>>>>         i915_vma_set_purged(vma);
>>>>>>>>>>     atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>>>>>>>>>-    WARN_ON(__i915_vma_unbind(vma));
>>>>>>>>>>+    if (async) {
>>>>>>>>>>+        struct dma_fence *fence;
>>>>>>>>>>+
>>>>>>>>>>+        fence = __i915_vma_unbind_async(vma);
>>>>>>>>>>+        if (IS_ERR_OR_NULL(fence)) {
>>>>>>>>>>+            async = false;
>>>>>>>>>>+        } else {
>>>>>>>>>>+            dma_resv_add_fence(vma->obj->base.resv, fence,
>>>>>>>>>>+                       DMA_RESV_USAGE_READ);
>>>>>>>>>>+            dma_fence_put(fence);
>>>>>>>>>>+        }
>>>>>>>>>>+    }
>>>>>>>>>>+
>>>>>>>>>>+    if (!async)
>>>>>>>>>>+        WARN_ON(__i915_vma_unbind(vma));
>>>>>>>>>>     GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>>>>>>>>>> }
>>>>>>>>>>@@ -1787,7 +1803,7 @@ void 
>>>>>>>>>>i915_vma_destroy_locked(struct i915_vma *vma)
>>>>>>>>>> {
>>>>>>>>>>     lockdep_assert_held(&vma->vm->mutex);
>>>>>>>>>>-    force_unbind(vma);
>>>>>>>>>>+    force_unbind(vma, false);
>>>>>>>>>>     list_del_init(&vma->vm_link);
>>>>>>>>>>     release_references(vma, vma->vm->gt, false);
>>>>>>>>>> }
>>>>>>>>>>@@ -1798,7 +1814,34 @@ void i915_vma_destroy(struct i915_vma *vma)
>>>>>>>>>>     bool vm_ddestroy;
>>>>>>>>>>     mutex_lock(&vma->vm->mutex);
>>>>>>>>>>-    force_unbind(vma);
>>>>>>>>>>+    force_unbind(vma, false);
>>>>>>>>>>+    list_del_init(&vma->vm_link);
>>>>>>>>>>+    vm_ddestroy = vma->vm_ddestroy;
>>>>>>>>>>+    vma->vm_ddestroy = false;
>>>>>>>>>>+
>>>>>>>>>>+    /* vma->vm may be freed when releasing vma->vm->mutex. */
>>>>>>>>>>+    gt = vma->vm->gt;
>>>>>>>>>>+    mutex_unlock(&vma->vm->mutex);
>>>>>>>>>>+    release_references(vma, gt, vm_ddestroy);
>>>>>>>>>>+}
>>>>>>>>>>+
>>>>>>>>>>+void i915_vma_destroy_async(struct i915_vma *vma)
>>>>>>>>>>+{
>>>>>>>>>>+    bool vm_ddestroy, async = vma->obj->mm.rsgt;
>>>>>>>>>>+    struct intel_gt *gt;
>>>>>>>>>>+
>>>>>>>>>>+    if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
>>>>>>>>>>+        async = false;
>>>>>>>>>>+
>>>>>>>>>>+    mutex_lock(&vma->vm->mutex);
>>>>>>>>>>+    /*
>>>>>>>>>>+     * Ensure any asynchronous binding is complete while using
>>>>>>>>>>+     * async unbind as we will be releasing the vma here.
>>>>>>>>>>+     */
>>>>>>>>>>+    if (async && i915_active_wait(&vma->active))
>>>>>>>>>>+        async = false;
>>>>>>>>>>+
>>>>>>>>>>+    force_unbind(vma, async);
>>>>>>>>>>     list_del_init(&vma->vm_link);
>>>>>>>>>>     vm_ddestroy = vma->vm_ddestroy;
>>>>>>>>>>     vma->vm_ddestroy = false;
>>>>>>>>>>diff --git a/drivers/gpu/drm/i915/i915_vma.h 
>>>>>>>>>>b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>>>index 737ef310d046..25f15965dab8 100644
>>>>>>>>>>--- a/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>>>+++ b/drivers/gpu/drm/i915/i915_vma.h
>>>>>>>>>>@@ -272,6 +272,7 @@ void i915_vma_reopen(struct i915_vma *vma);
>>>>>>>>>> void i915_vma_destroy_locked(struct i915_vma *vma);
>>>>>>>>>> void i915_vma_destroy(struct i915_vma *vma);
>>>>>>>>>>+void i915_vma_destroy_async(struct i915_vma *vma);
>>>>>>>>>> #define assert_vma_held(vma) 
>>>>>>>>>>dma_resv_assert_held((vma)->obj->base.resv)
>>>>>>>>>>diff --git a/include/uapi/drm/i915_drm.h 
>>>>>>>>>>b/include/uapi/drm/i915_drm.h
>>>>>>>>>>index e5600f358a15..431d40bb1dee 100644
>>>>>>>>>>--- a/include/uapi/drm/i915_drm.h
>>>>>>>>>>+++ b/include/uapi/drm/i915_drm.h
>>>>>>>>>>@@ -3969,7 +3969,8 @@ struct drm_i915_gem_vm_bind {
>>>>>>>>>>  * any error.
>>>>>>>>>>  *
>>>>>>>>>>  * VM_BIND/UNBIND ioctl calls executed on different 
>>>>>>>>>>CPU threads concurrently
>>>>>>>>>>- * are not ordered.
>>>>>>>>>>+ * are not ordered. Furthermore, parts of the 
>>>>>>>>>>VM_UNBIND operation can be done
>>>>>>>>>>+ * asynchronously.
>>>>>>>>>>  */
>>>>>>>>>> struct drm_i915_gem_vm_unbind {
>>>>>>>>>>     /** @vm_id: VM (address space) id to bind */
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
index d87d1210365b..36651b447966 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -210,7 +210,7 @@  static int i915_gem_vm_unbind_vma(struct i915_address_space *vm,
 	 */
 	obj = vma->obj;
 	i915_gem_object_lock(obj, NULL);
-	i915_vma_destroy(vma);
+	i915_vma_destroy_async(vma);
 	i915_gem_object_unlock(obj);
 
 	i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7cf77c67d755..483d25f2425c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -42,6 +42,8 @@ 
 #include "i915_vma.h"
 #include "i915_vma_resource.h"
 
+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma);
+
 static inline void assert_vma_held_evict(const struct i915_vma *vma)
 {
 	/*
@@ -1713,7 +1715,7 @@  void i915_vma_reopen(struct i915_vma *vma)
 	spin_unlock_irq(&gt->closed_lock);
 }
 
-static void force_unbind(struct i915_vma *vma)
+static void force_unbind(struct i915_vma *vma, bool async)
 {
 	if (!drm_mm_node_allocated(&vma->node))
 		return;
@@ -1727,7 +1729,21 @@  static void force_unbind(struct i915_vma *vma)
 		i915_vma_set_purged(vma);
 
 	atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
-	WARN_ON(__i915_vma_unbind(vma));
+	if (async) {
+		struct dma_fence *fence;
+
+		fence = __i915_vma_unbind_async(vma);
+		if (IS_ERR_OR_NULL(fence)) {
+			async = false;
+		} else {
+			dma_resv_add_fence(vma->obj->base.resv, fence,
+					   DMA_RESV_USAGE_READ);
+			dma_fence_put(fence);
+		}
+	}
+
+	if (!async)
+		WARN_ON(__i915_vma_unbind(vma));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 }
 
@@ -1787,7 +1803,7 @@  void i915_vma_destroy_locked(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->vm->mutex);
 
-	force_unbind(vma);
+	force_unbind(vma, false);
 	list_del_init(&vma->vm_link);
 	release_references(vma, vma->vm->gt, false);
 }
@@ -1798,7 +1814,34 @@  void i915_vma_destroy(struct i915_vma *vma)
 	bool vm_ddestroy;
 
 	mutex_lock(&vma->vm->mutex);
-	force_unbind(vma);
+	force_unbind(vma, false);
+	list_del_init(&vma->vm_link);
+	vm_ddestroy = vma->vm_ddestroy;
+	vma->vm_ddestroy = false;
+
+	/* vma->vm may be freed when releasing vma->vm->mutex. */
+	gt = vma->vm->gt;
+	mutex_unlock(&vma->vm->mutex);
+	release_references(vma, gt, vm_ddestroy);
+}
+
+void i915_vma_destroy_async(struct i915_vma *vma)
+{
+	bool vm_ddestroy, async = vma->obj->mm.rsgt;
+	struct intel_gt *gt;
+
+	if (dma_resv_reserve_fences(vma->obj->base.resv, 1))
+		async = false;
+
+	mutex_lock(&vma->vm->mutex);
+	/*
+	 * Ensure any asynchronous binding is complete while using
+	 * async unbind as we will be releasing the vma here.
+	 */
+	if (async && i915_active_wait(&vma->active))
+		async = false;
+
+	force_unbind(vma, async);
 	list_del_init(&vma->vm_link);
 	vm_ddestroy = vma->vm_ddestroy;
 	vma->vm_ddestroy = false;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 737ef310d046..25f15965dab8 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -272,6 +272,7 @@  void i915_vma_reopen(struct i915_vma *vma);
 
 void i915_vma_destroy_locked(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
+void i915_vma_destroy_async(struct i915_vma *vma);
 
 #define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e5600f358a15..431d40bb1dee 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3969,7 +3969,8 @@  struct drm_i915_gem_vm_bind {
  * any error.
  *
  * VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
- * are not ordered.
+ * are not ordered. Furthermore, parts of the VM_UNBIND operation can be done
+ * asynchronously.
  */
 struct drm_i915_gem_vm_unbind {
 	/** @vm_id: VM (address space) id to bind */