diff mbox series

[3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.

Message ID 20231031134059.171277-4-ishitatsuyuki@gmail.com (mailing list archive)
State New, archived
Headers show
Series drm/amdgpu: Add flag to disable implicit sync for GEM operations. | expand

Commit Message

Tatsuyuki Ishi Oct. 31, 2023, 1:40 p.m. UTC
The current amdgpu_gem_va_update_vm only tries to perform updates for the
BO specified in the GEM ioctl; however, when a binding is split, the
adjacent bindings also need to be updated. Such updates currently ends up
getting deferred until next submission which causes stalls.

Introduce a new state "dirty", shared between per-VM BOs and traditional
BOs, containing all BOs that have pending updates in `invalids`.
amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
in the dirty state.

Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
 3 files changed, 63 insertions(+), 24 deletions(-)

Comments

Christian König Oct. 31, 2023, 1:57 p.m. UTC | #1
Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
> The current amdgpu_gem_va_update_vm only tries to perform updates for the
> BO specified in the GEM ioctl; however, when a binding is split, the
> adjacent bindings also need to be updated. Such updates currently ends up
> getting deferred until next submission which causes stalls.

Yeah, that is a necessity. The hardware simply doesn't support what you 
try to do here in all cases.

So this approach won't work in general.

Regards,
Christian.

>
> Introduce a new state "dirty", shared between per-VM BOs and traditional
> BOs, containing all BOs that have pending updates in `invalids`.
> amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
> in the dirty state.
>
> Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>   3 files changed, 63 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index a1b15d0d6c48..01d3a97248b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>    * vital here, so they are not reported back to userspace.
>    */
>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> -				    struct amdgpu_vm *vm,
> -				    struct amdgpu_bo_va *bo_va,
> -				    uint32_t operation)
> +				    struct amdgpu_vm *vm)
>   {
> +	struct amdgpu_bo_va *bo_va;
>   	int r;
>   
>   	if (!amdgpu_vm_ready(vm))
> @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   	if (r)
>   		goto error;
>   
> -	if (operation == AMDGPU_VA_OP_MAP ||
> -	    operation == AMDGPU_VA_OP_REPLACE) {
> +	spin_lock(&vm->status_lock);
> +	while (!list_empty(&vm->dirty)) {
> +		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> +					 base.vm_status);
> +		spin_unlock(&vm->status_lock);
> +
>   		r = amdgpu_vm_bo_update(adev, bo_va, false);
>   		if (r)
>   			goto error;
> +		spin_lock(&vm->status_lock);
>   	}
> +	spin_unlock(&vm->status_lock);
>   
>   	r = amdgpu_vm_update_pdes(adev, vm, false);
>   
> @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   		break;
>   	}
>   	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> -		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> -					args->operation);
> +		amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>   
>   error:
>   	drm_exec_fini(&exec);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index dd6f72e2a1d6..01d31891cd05 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
>   	spin_unlock(&vm_bo->vm->status_lock);
>   }
>   
> +/**
> + * amdgpu_vm_bo_dirty - vm_bo is dirty
> + *
> + * @vm_bo: vm_bo which is dirty
> + *
> + * State for normal and per VM BOs that are not moved, but have new entries in
> + * bo_va->invalids.
> + */
> +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
> +{
> +	spin_lock(&vm_bo->vm->status_lock);
> +	list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
> +	spin_unlock(&vm_bo->vm->status_lock);
> +}
> +
>   /**
>    * amdgpu_vm_bo_moved - vm_bo is moved
>    *
> @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
>   	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
>   		amdgpu_vm_bo_get_memory(bo_va, stats);
>   
> +	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
> +		amdgpu_vm_bo_get_memory(bo_va, stats);
> +
>   	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
>   		amdgpu_vm_bo_get_memory(bo_va, stats);
>   
> @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>   			dma_resv_unlock(resv);
>   		spin_lock(&vm->status_lock);
>   	}
> +
> +	while (!list_empty(&vm->dirty)) {
> +		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> +					 base.vm_status);
> +		spin_unlock(&vm->status_lock);
> +
> +		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		if (r)
> +			return r;
> +		spin_lock(&vm->status_lock);
> +	}
>   	spin_unlock(&vm->status_lock);
>   
>   	return 0;
> @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>   				    struct amdgpu_bo_va_mapping *mapping)
>   {
>   	struct amdgpu_vm *vm = bo_va->base.vm;
> -	struct amdgpu_bo *bo = bo_va->base.bo;
>   
>   	mapping->bo_va = bo_va;
>   	list_add(&mapping->list, &bo_va->invalids);
>   	amdgpu_vm_it_insert(mapping, &vm->va);
> +	if (!bo_va->base.moved)
> +		amdgpu_vm_bo_dirty(&bo_va->base);
>   
>   	if (mapping->flags & AMDGPU_PTE_PRT)
>   		amdgpu_vm_prt_get(adev);
>   
> -	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> -	    !bo_va->base.moved) {
> -		amdgpu_vm_bo_moved(&bo_va->base);
> -	}
>   	trace_amdgpu_vm_bo_map(bo_va, mapping);
>   }
>   
> @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>   			before->flags = tmp->flags;
>   			before->bo_va = tmp->bo_va;
>   			list_add(&before->list, &tmp->bo_va->invalids);
> +			if (!tmp->bo_va->base.moved)
> +				amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>   		}
>   
>   		/* Remember mapping split at the end */
> @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>   			after->flags = tmp->flags;
>   			after->bo_va = tmp->bo_va;
>   			list_add(&after->list, &tmp->bo_va->invalids);
> +			if (!tmp->bo_va->base.moved)
> +				amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>   		}
>   
>   		list_del(&tmp->list);
> @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>   
>   	/* Insert partial mapping before the range */
>   	if (!list_empty(&before->list)) {
> -		struct amdgpu_bo *bo = before->bo_va->base.bo;
> -
>   		amdgpu_vm_it_insert(before, &vm->va);
>   		if (before->flags & AMDGPU_PTE_PRT)
>   			amdgpu_vm_prt_get(adev);
> -
> -		if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> -		    !before->bo_va->base.moved)
> -			amdgpu_vm_bo_moved(&before->bo_va->base);
>   	} else {
>   		kfree(before);
>   	}
>   
>   	/* Insert partial mapping after the range */
>   	if (!list_empty(&after->list)) {
> -		struct amdgpu_bo *bo = after->bo_va->base.bo;
> -
>   		amdgpu_vm_it_insert(after, &vm->va);
>   		if (after->flags & AMDGPU_PTE_PRT)
>   			amdgpu_vm_prt_get(adev);
> -
> -		if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> -		    !after->bo_va->base.moved)
> -			amdgpu_vm_bo_moved(&after->bo_va->base);
>   	} else {
>   		kfree(after);
>   	}
> @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>   	INIT_LIST_HEAD(&vm->evicted);
>   	INIT_LIST_HEAD(&vm->relocated);
>   	INIT_LIST_HEAD(&vm->moved);
> +	INIT_LIST_HEAD(&vm->dirty);
>   	INIT_LIST_HEAD(&vm->idle);
>   	INIT_LIST_HEAD(&vm->invalidated);
>   	spin_lock_init(&vm->status_lock);
> @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>   {
>   	struct amdgpu_bo_va *bo_va, *tmp;
>   	u64 total_idle = 0;
> +	u64 total_dirty = 0;
>   	u64 total_relocated = 0;
>   	u64 total_moved = 0;
>   	u64 total_invalidated = 0;
>   	u64 total_done = 0;
>   	unsigned int total_idle_objs = 0;
> +	unsigned int total_dirty_objs = 0;
>   	unsigned int total_relocated_objs = 0;
>   	unsigned int total_moved_objs = 0;
>   	unsigned int total_invalidated_objs = 0;
> @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>   	total_idle_objs = id;
>   	id = 0;
>   
> +	seq_puts(m, "\tDirty BOs:\n");
> +	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
> +		if (!bo_va->base.bo)
> +			continue;
> +		total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> +	}
> +	total_dirty_objs = id;
> +	id = 0;
> +
>   	seq_puts(m, "\tRelocated BOs:\n");
>   	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
>   		if (!bo_va->base.bo)
> @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>   
>   	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
>   		   total_idle_objs);
> +	seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n", total_dirty,
> +		   total_dirty_objs);
>   	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
>   		   total_relocated_objs);
>   	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d9ab97eabda9..f91d4fcf80b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -276,6 +276,9 @@ struct amdgpu_vm {
>   	/* per VM BOs moved, but not yet updated in the PT */
>   	struct list_head	moved;
>   
> +	/* normal and per VM BOs that are not moved, but have new PT entries */
> +	struct list_head	dirty;
> +
>   	/* All BOs of this VM not currently in the state machine */
>   	struct list_head	idle;
>
Bas Nieuwenhuizen Oct. 31, 2023, 1:59 p.m. UTC | #2
On Tue, Oct 31, 2023 at 2:57 PM Christian König <christian.koenig@amd.com>
wrote:

> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
> > The current amdgpu_gem_va_update_vm only tries to perform updates for the
> > BO specified in the GEM ioctl; however, when a binding is split, the
> > adjacent bindings also need to be updated. Such updates currently ends up
> > getting deferred until next submission which causes stalls.
>
> Yeah, that is a necessity. The hardware simply doesn't support what you
> try to do here in all cases.
>

What can the hardware not do here? Is this just needing to wait for TLB
flushes before we can free pagetables, can we just delay that?


>
> So this approach won't work in general.
>
> Regards,
> Christian.
>
> >
> > Introduce a new state "dirty", shared between per-VM BOs and traditional
> > BOs, containing all BOs that have pending updates in `invalids`.
> > amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
> > in the dirty state.
> >
> > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
> >   3 files changed, 63 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > index a1b15d0d6c48..01d3a97248b0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device
> *dev, void *data,
> >    * vital here, so they are not reported back to userspace.
> >    */
> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> > -                                 struct amdgpu_vm *vm,
> > -                                 struct amdgpu_bo_va *bo_va,
> > -                                 uint32_t operation)
> > +                                 struct amdgpu_vm *vm)
> >   {
> > +     struct amdgpu_bo_va *bo_va;
> >       int r;
> >
> >       if (!amdgpu_vm_ready(vm))
> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
> amdgpu_device *adev,
> >       if (r)
> >               goto error;
> >
> > -     if (operation == AMDGPU_VA_OP_MAP ||
> > -         operation == AMDGPU_VA_OP_REPLACE) {
> > +     spin_lock(&vm->status_lock);
> > +     while (!list_empty(&vm->dirty)) {
> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> > +                                      base.vm_status);
> > +             spin_unlock(&vm->status_lock);
> > +
> >               r = amdgpu_vm_bo_update(adev, bo_va, false);
> >               if (r)
> >                       goto error;
> > +             spin_lock(&vm->status_lock);
> >       }
> > +     spin_unlock(&vm->status_lock);
> >
> >       r = amdgpu_vm_update_pdes(adev, vm, false);
> >
> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
> *data,
> >               break;
> >       }
> >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
> !amdgpu_vm_debug)
> > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> > -                                     args->operation);
> > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
> >
> >   error:
> >       drm_exec_fini(&exec);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index dd6f72e2a1d6..01d31891cd05 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
> amdgpu_vm_bo_base *vm_bo, bool evict
> >       spin_unlock(&vm_bo->vm->status_lock);
> >   }
> >
> > +/**
> > + * amdgpu_vm_bo_dirty - vm_bo is dirty
> > + *
> > + * @vm_bo: vm_bo which is dirty
> > + *
> > + * State for normal and per VM BOs that are not moved, but have new
> entries in
> > + * bo_va->invalids.
> > + */
> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
> > +{
> > +     spin_lock(&vm_bo->vm->status_lock);
> > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
> > +     spin_unlock(&vm_bo->vm->status_lock);
> > +}
> > +
> >   /**
> >    * amdgpu_vm_bo_moved - vm_bo is moved
> >    *
> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
> >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
> base.eviction_status)
> >               amdgpu_vm_bo_get_memory(bo_va, stats);
> >
> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
> > +             amdgpu_vm_bo_get_memory(bo_va, stats);
> > +
> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
> base.vm_status)
> >               amdgpu_vm_bo_get_memory(bo_va, stats);
> >
> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device
> *adev,
> >                       dma_resv_unlock(resv);
> >               spin_lock(&vm->status_lock);
> >       }
> > +
> > +     while (!list_empty(&vm->dirty)) {
> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
> > +                                      base.vm_status);
> > +             spin_unlock(&vm->status_lock);
> > +
> > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
> > +             if (r)
> > +                     return r;
> > +             spin_lock(&vm->status_lock);
> > +     }
> >       spin_unlock(&vm->status_lock);
> >
> >       return 0;
> > @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct
> amdgpu_device *adev,
> >                                   struct amdgpu_bo_va_mapping *mapping)
> >   {
> >       struct amdgpu_vm *vm = bo_va->base.vm;
> > -     struct amdgpu_bo *bo = bo_va->base.bo;
> >
> >       mapping->bo_va = bo_va;
> >       list_add(&mapping->list, &bo_va->invalids);
> >       amdgpu_vm_it_insert(mapping, &vm->va);
> > +     if (!bo_va->base.moved)
> > +             amdgpu_vm_bo_dirty(&bo_va->base);
> >
> >       if (mapping->flags & AMDGPU_PTE_PRT)
> >               amdgpu_vm_prt_get(adev);
> >
> > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> > -         !bo_va->base.moved) {
> > -             amdgpu_vm_bo_moved(&bo_va->base);
> > -     }
> >       trace_amdgpu_vm_bo_map(bo_va, mapping);
> >   }
> >
> > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
> amdgpu_device *adev,
> >                       before->flags = tmp->flags;
> >                       before->bo_va = tmp->bo_va;
> >                       list_add(&before->list, &tmp->bo_va->invalids);
> > +                     if (!tmp->bo_va->base.moved)
> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
> >               }
> >
> >               /* Remember mapping split at the end */
> > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
> amdgpu_device *adev,
> >                       after->flags = tmp->flags;
> >                       after->bo_va = tmp->bo_va;
> >                       list_add(&after->list, &tmp->bo_va->invalids);
> > +                     if (!tmp->bo_va->base.moved)
> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
> >               }
> >
> >               list_del(&tmp->list);
> > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
> amdgpu_device *adev,
> >
> >       /* Insert partial mapping before the range */
> >       if (!list_empty(&before->list)) {
> > -             struct amdgpu_bo *bo = before->bo_va->base.bo;
> > -
> >               amdgpu_vm_it_insert(before, &vm->va);
> >               if (before->flags & AMDGPU_PTE_PRT)
> >                       amdgpu_vm_prt_get(adev);
> > -
> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv
> &&
> > -                 !before->bo_va->base.moved)
> > -                     amdgpu_vm_bo_moved(&before->bo_va->base);
> >       } else {
> >               kfree(before);
> >       }
> >
> >       /* Insert partial mapping after the range */
> >       if (!list_empty(&after->list)) {
> > -             struct amdgpu_bo *bo = after->bo_va->base.bo;
> > -
> >               amdgpu_vm_it_insert(after, &vm->va);
> >               if (after->flags & AMDGPU_PTE_PRT)
> >                       amdgpu_vm_prt_get(adev);
> > -
> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv
> &&
> > -                 !after->bo_va->base.moved)
> > -                     amdgpu_vm_bo_moved(&after->bo_va->base);
> >       } else {
> >               kfree(after);
> >       }
> > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
> struct amdgpu_vm *vm, int32_t xcp
> >       INIT_LIST_HEAD(&vm->evicted);
> >       INIT_LIST_HEAD(&vm->relocated);
> >       INIT_LIST_HEAD(&vm->moved);
> > +     INIT_LIST_HEAD(&vm->dirty);
> >       INIT_LIST_HEAD(&vm->idle);
> >       INIT_LIST_HEAD(&vm->invalidated);
> >       spin_lock_init(&vm->status_lock);
> > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm
> *vm, struct seq_file *m)
> >   {
> >       struct amdgpu_bo_va *bo_va, *tmp;
> >       u64 total_idle = 0;
> > +     u64 total_dirty = 0;
> >       u64 total_relocated = 0;
> >       u64 total_moved = 0;
> >       u64 total_invalidated = 0;
> >       u64 total_done = 0;
> >       unsigned int total_idle_objs = 0;
> > +     unsigned int total_dirty_objs = 0;
> >       unsigned int total_relocated_objs = 0;
> >       unsigned int total_moved_objs = 0;
> >       unsigned int total_invalidated_objs = 0;
> > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm
> *vm, struct seq_file *m)
> >       total_idle_objs = id;
> >       id = 0;
> >
> > +     seq_puts(m, "\tDirty BOs:\n");
> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
> > +             if (!bo_va->base.bo)
> > +                     continue;
> > +             total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo,
> m);
> > +     }
> > +     total_dirty_objs = id;
> > +     id = 0;
> > +
> >       seq_puts(m, "\tRelocated BOs:\n");
> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
> base.vm_status) {
> >               if (!bo_va->base.bo)
> > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm
> *vm, struct seq_file *m)
> >
> >       seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n",
> total_idle,
> >                  total_idle_objs);
> > +     seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n",
> total_dirty,
> > +                total_dirty_objs);
> >       seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n",
> total_relocated,
> >                  total_relocated_objs);
> >       seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n",
> total_moved,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index d9ab97eabda9..f91d4fcf80b8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -276,6 +276,9 @@ struct amdgpu_vm {
> >       /* per VM BOs moved, but not yet updated in the PT */
> >       struct list_head        moved;
> >
> > +     /* normal and per VM BOs that are not moved, but have new PT
> entries */
> > +     struct list_head        dirty;
> > +
> >       /* All BOs of this VM not currently in the state machine */
> >       struct list_head        idle;
> >
>
>
Christian König Oct. 31, 2023, 2:07 p.m. UTC | #3
Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>
>
> On Tue, Oct 31, 2023 at 2:57 PM Christian König 
> <christian.koenig@amd.com> wrote:
>
>     Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>     > The current amdgpu_gem_va_update_vm only tries to perform
>     updates for the
>     > BO specified in the GEM ioctl; however, when a binding is split, the
>     > adjacent bindings also need to be updated. Such updates
>     currently ends up
>     > getting deferred until next submission which causes stalls.
>
>     Yeah, that is a necessity. The hardware simply doesn't support
>     what you
>     try to do here in all cases.
>
>
> What can the hardware not do here? Is this just needing to wait for 
> TLB flushes before we can free pagetables, can we just delay that?

On some hardware generations (especially Navi1x, but also everything 
older than Polaris) you can't invalidate the TLB while it is in use.

For Polaris and older it just means that you don't have a guarantee that 
the shader can't access the memory any more. So delaying the free 
operation helps here.

But for Navi1x it's a workaround for a hardware bug. If you try to 
invalidate the TLB while it is in use you can potentially triggering 
memory accesses to random addresses.

That's why we still delay TLB invalidation's to the next CS and use a 
new VMID for each submission instead of invalidating the old one.

I'm currently working on changing that for Navi2x and newer (maybe Vega 
as well), but this is something you can really only do on some hw 
generations after validating that it works.

Regards,
Christian.

>
>
>     So this approach won't work in general.
>
>     Regards,
>     Christian.
>
>     >
>     > Introduce a new state "dirty", shared between per-VM BOs and
>     traditional
>     > BOs, containing all BOs that have pending updates in `invalids`.
>     > amdgpu_gem_va_update_vm will now simply flush any pending
>     updates for BOs
>     > in the dirty state.
>     >
>     > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
>     > ---
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66
>     ++++++++++++++++++-------
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>     >   3 files changed, 63 insertions(+), 24 deletions(-)
>     >
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>     > index a1b15d0d6c48..01d3a97248b0 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>     > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct
>     drm_device *dev, void *data,
>     >    * vital here, so they are not reported back to userspace.
>     >    */
>     >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>     > -                                 struct amdgpu_vm *vm,
>     > -                                 struct amdgpu_bo_va *bo_va,
>     > -                                 uint32_t operation)
>     > +                                 struct amdgpu_vm *vm)
>     >   {
>     > +     struct amdgpu_bo_va *bo_va;
>     >       int r;
>     >
>     >       if (!amdgpu_vm_ready(vm))
>     > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
>     amdgpu_device *adev,
>     >       if (r)
>     >               goto error;
>     >
>     > -     if (operation == AMDGPU_VA_OP_MAP ||
>     > -         operation == AMDGPU_VA_OP_REPLACE) {
>     > +     spin_lock(&vm->status_lock);
>     > +     while (!list_empty(&vm->dirty)) {
>     > +             bo_va = list_first_entry(&vm->dirty, struct
>     amdgpu_bo_va,
>     > +                                      base.vm_status);
>     > +             spin_unlock(&vm->status_lock);
>     > +
>     >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>     >               if (r)
>     >                       goto error;
>     > +             spin_lock(&vm->status_lock);
>     >       }
>     > +     spin_unlock(&vm->status_lock);
>     >
>     >       r = amdgpu_vm_update_pdes(adev, vm, false);
>     >
>     > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device
>     *dev, void *data,
>     >               break;
>     >       }
>     >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
>     !amdgpu_vm_debug)
>     > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>     > -  args->operation);
>     > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>     >
>     >   error:
>     >       drm_exec_fini(&exec);
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>     > index dd6f72e2a1d6..01d31891cd05 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>     > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
>     amdgpu_vm_bo_base *vm_bo, bool evict
>     >       spin_unlock(&vm_bo->vm->status_lock);
>     >   }
>     >
>     > +/**
>     > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>     > + *
>     > + * @vm_bo: vm_bo which is dirty
>     > + *
>     > + * State for normal and per VM BOs that are not moved, but have
>     new entries in
>     > + * bo_va->invalids.
>     > + */
>     > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>     > +{
>     > +     spin_lock(&vm_bo->vm->status_lock);
>     > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>     > +     spin_unlock(&vm_bo->vm->status_lock);
>     > +}
>     > +
>     >   /**
>     >    * amdgpu_vm_bo_moved - vm_bo is moved
>     >    *
>     > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm
>     *vm,
>     >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
>     base.eviction_status)
>     >               amdgpu_vm_bo_get_memory(bo_va, stats);
>     >
>     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>     base.vm_status)
>     > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>     > +
>     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>     base.vm_status)
>     >               amdgpu_vm_bo_get_memory(bo_va, stats);
>     >
>     > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct
>     amdgpu_device *adev,
>     >                       dma_resv_unlock(resv);
>     >               spin_lock(&vm->status_lock);
>     >       }
>     > +
>     > +     while (!list_empty(&vm->dirty)) {
>     > +             bo_va = list_first_entry(&vm->dirty, struct
>     amdgpu_bo_va,
>     > +                                      base.vm_status);
>     > +             spin_unlock(&vm->status_lock);
>     > +
>     > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>     > +             if (r)
>     > +                     return r;
>     > +             spin_lock(&vm->status_lock);
>     > +     }
>     >       spin_unlock(&vm->status_lock);
>     >
>     >       return 0;
>     > @@ -1476,19 +1505,16 @@ static void
>     amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>     >                                   struct amdgpu_bo_va_mapping
>     *mapping)
>     >   {
>     >       struct amdgpu_vm *vm = bo_va->base.vm;
>     > -     struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo>;
>     >
>     >       mapping->bo_va = bo_va;
>     >       list_add(&mapping->list, &bo_va->invalids);
>     >       amdgpu_vm_it_insert(mapping, &vm->va);
>     > +     if (!bo_va->base.moved)
>     > +             amdgpu_vm_bo_dirty(&bo_va->base);
>     >
>     >       if (mapping->flags & AMDGPU_PTE_PRT)
>     >               amdgpu_vm_prt_get(adev);
>     >
>     > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>     > -         !bo_va->base.moved) {
>     > -             amdgpu_vm_bo_moved(&bo_va->base);
>     > -     }
>     >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>     >   }
>     >
>     > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>     amdgpu_device *adev,
>     >                       before->flags = tmp->flags;
>     >                       before->bo_va = tmp->bo_va;
>     >                       list_add(&before->list,
>     &tmp->bo_va->invalids);
>     > +                     if (!tmp->bo_va->base.moved)
>     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>     >               }
>     >
>     >               /* Remember mapping split at the end */
>     > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>     amdgpu_device *adev,
>     >                       after->flags = tmp->flags;
>     >                       after->bo_va = tmp->bo_va;
>     >                       list_add(&after->list, &tmp->bo_va->invalids);
>     > +                     if (!tmp->bo_va->base.moved)
>     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>     >               }
>     >
>     >               list_del(&tmp->list);
>     > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
>     amdgpu_device *adev,
>     >
>     >       /* Insert partial mapping before the range */
>     >       if (!list_empty(&before->list)) {
>     > -             struct amdgpu_bo *bo = before->bo_va->base.bo
>     <http://base.bo>;
>     > -
>     >               amdgpu_vm_it_insert(before, &vm->va);
>     >               if (before->flags & AMDGPU_PTE_PRT)
>     >                       amdgpu_vm_prt_get(adev);
>     > -
>     > -             if (bo && bo->tbo.base.resv ==
>     vm->root.bo->tbo.base.resv &&
>     > -                 !before->bo_va->base.moved)
>     > -  amdgpu_vm_bo_moved(&before->bo_va->base);
>     >       } else {
>     >               kfree(before);
>     >       }
>     >
>     >       /* Insert partial mapping after the range */
>     >       if (!list_empty(&after->list)) {
>     > -             struct amdgpu_bo *bo = after->bo_va->base.bo
>     <http://base.bo>;
>     > -
>     >               amdgpu_vm_it_insert(after, &vm->va);
>     >               if (after->flags & AMDGPU_PTE_PRT)
>     >                       amdgpu_vm_prt_get(adev);
>     > -
>     > -             if (bo && bo->tbo.base.resv ==
>     vm->root.bo->tbo.base.resv &&
>     > -                 !after->bo_va->base.moved)
>     > -  amdgpu_vm_bo_moved(&after->bo_va->base);
>     >       } else {
>     >               kfree(after);
>     >       }
>     > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device
>     *adev, struct amdgpu_vm *vm, int32_t xcp
>     >       INIT_LIST_HEAD(&vm->evicted);
>     >       INIT_LIST_HEAD(&vm->relocated);
>     >       INIT_LIST_HEAD(&vm->moved);
>     > +     INIT_LIST_HEAD(&vm->dirty);
>     >       INIT_LIST_HEAD(&vm->idle);
>     >       INIT_LIST_HEAD(&vm->invalidated);
>     >       spin_lock_init(&vm->status_lock);
>     > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct
>     amdgpu_vm *vm, struct seq_file *m)
>     >   {
>     >       struct amdgpu_bo_va *bo_va, *tmp;
>     >       u64 total_idle = 0;
>     > +     u64 total_dirty = 0;
>     >       u64 total_relocated = 0;
>     >       u64 total_moved = 0;
>     >       u64 total_invalidated = 0;
>     >       u64 total_done = 0;
>     >       unsigned int total_idle_objs = 0;
>     > +     unsigned int total_dirty_objs = 0;
>     >       unsigned int total_relocated_objs = 0;
>     >       unsigned int total_moved_objs = 0;
>     >       unsigned int total_invalidated_objs = 0;
>     > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct
>     amdgpu_vm *vm, struct seq_file *m)
>     >       total_idle_objs = id;
>     >       id = 0;
>     >
>     > +     seq_puts(m, "\tDirty BOs:\n");
>     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>     base.vm_status) {
>     > +             if (!bo_va->base.bo <http://base.bo>)
>     > +                     continue;
>     > +             total_dirty += amdgpu_bo_print_info(id++,
>     bo_va->base.bo <http://base.bo>, m);
>     > +     }
>     > +     total_dirty_objs = id;
>     > +     id = 0;
>     > +
>     >       seq_puts(m, "\tRelocated BOs:\n");
>     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>     base.vm_status) {
>     >               if (!bo_va->base.bo <http://base.bo>)
>     > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct
>     amdgpu_vm *vm, struct seq_file *m)
>     >
>     >       seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n",
>     total_idle,
>     >                  total_idle_objs);
>     > +     seq_printf(m, "\tTotal dirty size:  %12lld\tobjs:\t%d\n",
>     total_dirty,
>     > +                total_dirty_objs);
>     >       seq_printf(m, "\tTotal relocated size:
>      %12lld\tobjs:\t%d\n", total_relocated,
>     >                  total_relocated_objs);
>     >       seq_printf(m, "\tTotal moved size:  %12lld\tobjs:\t%d\n",
>     total_moved,
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>     > index d9ab97eabda9..f91d4fcf80b8 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>     > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>     >       /* per VM BOs moved, but not yet updated in the PT */
>     >       struct list_head        moved;
>     >
>     > +     /* normal and per VM BOs that are not moved, but have new
>     PT entries */
>     > +     struct list_head        dirty;
>     > +
>     >       /* All BOs of this VM not currently in the state machine */
>     >       struct list_head        idle;
>     >
>
Bas Nieuwenhuizen Oct. 31, 2023, 2:17 p.m. UTC | #4
On Tue, Oct 31, 2023 at 3:08 PM Christian König <christian.koenig@amd.com>
wrote:

> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>
>
>
> On Tue, Oct 31, 2023 at 2:57 PM Christian König <christian.koenig@amd.com>
> wrote:
>
>> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>> > The current amdgpu_gem_va_update_vm only tries to perform updates for
>> the
>> > BO specified in the GEM ioctl; however, when a binding is split, the
>> > adjacent bindings also need to be updated. Such updates currently ends
>> up
>> > getting deferred until next submission which causes stalls.
>>
>> Yeah, that is a necessity. The hardware simply doesn't support what you
>> try to do here in all cases.
>>
>
> What can the hardware not do here? Is this just needing to wait for TLB
> flushes before we can free pagetables, can we just delay that?
>
>
> On some hardware generations (especially Navi1x, but also everything older
> than Polaris) you can't invalidate the TLB while it is in use.
>
> For Polaris and older it just means that you don't have a guarantee that
> the shader can't access the memory any more. So delaying the free operation
> helps here.
>
> But for Navi1x it's a workaround for a hardware bug. If you try to
> invalidate the TLB while it is in use you can potentially triggering memory
> accesses to random addresses.
>
> That's why we still delay TLB invalidation's to the next CS and use a new
> VMID for each submission instead of invalidating the old one.
>
> I'm currently working on changing that for Navi2x and newer (maybe Vega as
> well), but this is something you can really only do on some hw generations
> after validating that it works.
>

I think as long as we make sure all significant work gets done
asynchronously, doing the TLB flushing on the next submit (/submissions,
one per queue?) is fine for our purposes.

(As an aside after thinking some more I *think* we also need some work to
make these maps/unmaps (VALID->PRT and PRT->VALID) atomic, as I think it is
valid Vulkan to make these race. As such I'm speculating we'd need a bit
more reworking there too, not just a late free of the lower level
pagetables)

- Bas

>
> Regards,
> Christian.
>
>
>
>>
>> So this approach won't work in general.
>>
>> Regards,
>> Christian.
>>
>> >
>> > Introduce a new state "dirty", shared between per-VM BOs and traditional
>> > BOs, containing all BOs that have pending updates in `invalids`.
>> > amdgpu_gem_va_update_vm will now simply flush any pending updates for
>> BOs
>> > in the dirty state.
>> >
>> > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>> >   3 files changed, 63 insertions(+), 24 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > index a1b15d0d6c48..01d3a97248b0 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device
>> *dev, void *data,
>> >    * vital here, so they are not reported back to userspace.
>> >    */
>> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>> > -                                 struct amdgpu_vm *vm,
>> > -                                 struct amdgpu_bo_va *bo_va,
>> > -                                 uint32_t operation)
>> > +                                 struct amdgpu_vm *vm)
>> >   {
>> > +     struct amdgpu_bo_va *bo_va;
>> >       int r;
>> >
>> >       if (!amdgpu_vm_ready(vm))
>> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
>> amdgpu_device *adev,
>> >       if (r)
>> >               goto error;
>> >
>> > -     if (operation == AMDGPU_VA_OP_MAP ||
>> > -         operation == AMDGPU_VA_OP_REPLACE) {
>> > +     spin_lock(&vm->status_lock);
>> > +     while (!list_empty(&vm->dirty)) {
>> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>> > +                                      base.vm_status);
>> > +             spin_unlock(&vm->status_lock);
>> > +
>> >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>> >               if (r)
>> >                       goto error;
>> > +             spin_lock(&vm->status_lock);
>> >       }
>> > +     spin_unlock(&vm->status_lock);
>> >
>> >       r = amdgpu_vm_update_pdes(adev, vm, false);
>> >
>> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>> >               break;
>> >       }
>> >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
>> !amdgpu_vm_debug)
>> > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>> > -                                     args->operation);
>> > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>> >
>> >   error:
>> >       drm_exec_fini(&exec);
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > index dd6f72e2a1d6..01d31891cd05 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
>> amdgpu_vm_bo_base *vm_bo, bool evict
>> >       spin_unlock(&vm_bo->vm->status_lock);
>> >   }
>> >
>> > +/**
>> > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>> > + *
>> > + * @vm_bo: vm_bo which is dirty
>> > + *
>> > + * State for normal and per VM BOs that are not moved, but have new
>> entries in
>> > + * bo_va->invalids.
>> > + */
>> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>> > +{
>> > +     spin_lock(&vm_bo->vm->status_lock);
>> > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>> > +     spin_unlock(&vm_bo->vm->status_lock);
>> > +}
>> > +
>> >   /**
>> >    * amdgpu_vm_bo_moved - vm_bo is moved
>> >    *
>> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
>> >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
>> base.eviction_status)
>> >               amdgpu_vm_bo_get_memory(bo_va, stats);
>> >
>> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
>> > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>> > +
>> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>> base.vm_status)
>> >               amdgpu_vm_bo_get_memory(bo_va, stats);
>> >
>> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device
>> *adev,
>> >                       dma_resv_unlock(resv);
>> >               spin_lock(&vm->status_lock);
>> >       }
>> > +
>> > +     while (!list_empty(&vm->dirty)) {
>> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>> > +                                      base.vm_status);
>> > +             spin_unlock(&vm->status_lock);
>> > +
>> > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>> > +             if (r)
>> > +                     return r;
>> > +             spin_lock(&vm->status_lock);
>> > +     }
>> >       spin_unlock(&vm->status_lock);
>> >
>> >       return 0;
>> > @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct
>> amdgpu_device *adev,
>> >                                   struct amdgpu_bo_va_mapping *mapping)
>> >   {
>> >       struct amdgpu_vm *vm = bo_va->base.vm;
>> > -     struct amdgpu_bo *bo = bo_va->base.bo;
>> >
>> >       mapping->bo_va = bo_va;
>> >       list_add(&mapping->list, &bo_va->invalids);
>> >       amdgpu_vm_it_insert(mapping, &vm->va);
>> > +     if (!bo_va->base.moved)
>> > +             amdgpu_vm_bo_dirty(&bo_va->base);
>> >
>> >       if (mapping->flags & AMDGPU_PTE_PRT)
>> >               amdgpu_vm_prt_get(adev);
>> >
>> > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>> > -         !bo_va->base.moved) {
>> > -             amdgpu_vm_bo_moved(&bo_va->base);
>> > -     }
>> >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>> >   }
>> >
>> > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>> amdgpu_device *adev,
>> >                       before->flags = tmp->flags;
>> >                       before->bo_va = tmp->bo_va;
>> >                       list_add(&before->list, &tmp->bo_va->invalids);
>> > +                     if (!tmp->bo_va->base.moved)
>> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>> >               }
>> >
>> >               /* Remember mapping split at the end */
>> > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>> amdgpu_device *adev,
>> >                       after->flags = tmp->flags;
>> >                       after->bo_va = tmp->bo_va;
>> >                       list_add(&after->list, &tmp->bo_va->invalids);
>> > +                     if (!tmp->bo_va->base.moved)
>> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>> >               }
>> >
>> >               list_del(&tmp->list);
>> > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
>> amdgpu_device *adev,
>> >
>> >       /* Insert partial mapping before the range */
>> >       if (!list_empty(&before->list)) {
>> > -             struct amdgpu_bo *bo = before->bo_va->base.bo;
>> > -
>> >               amdgpu_vm_it_insert(before, &vm->va);
>> >               if (before->flags & AMDGPU_PTE_PRT)
>> >                       amdgpu_vm_prt_get(adev);
>> > -
>> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv
>> &&
>> > -                 !before->bo_va->base.moved)
>> > -                     amdgpu_vm_bo_moved(&before->bo_va->base);
>> >       } else {
>> >               kfree(before);
>> >       }
>> >
>> >       /* Insert partial mapping after the range */
>> >       if (!list_empty(&after->list)) {
>> > -             struct amdgpu_bo *bo = after->bo_va->base.bo;
>> > -
>> >               amdgpu_vm_it_insert(after, &vm->va);
>> >               if (after->flags & AMDGPU_PTE_PRT)
>> >                       amdgpu_vm_prt_get(adev);
>> > -
>> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv
>> &&
>> > -                 !after->bo_va->base.moved)
>> > -                     amdgpu_vm_bo_moved(&after->bo_va->base);
>> >       } else {
>> >               kfree(after);
>> >       }
>> > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm, int32_t xcp
>> >       INIT_LIST_HEAD(&vm->evicted);
>> >       INIT_LIST_HEAD(&vm->relocated);
>> >       INIT_LIST_HEAD(&vm->moved);
>> > +     INIT_LIST_HEAD(&vm->dirty);
>> >       INIT_LIST_HEAD(&vm->idle);
>> >       INIT_LIST_HEAD(&vm->invalidated);
>> >       spin_lock_init(&vm->status_lock);
>> > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm
>> *vm, struct seq_file *m)
>> >   {
>> >       struct amdgpu_bo_va *bo_va, *tmp;
>> >       u64 total_idle = 0;
>> > +     u64 total_dirty = 0;
>> >       u64 total_relocated = 0;
>> >       u64 total_moved = 0;
>> >       u64 total_invalidated = 0;
>> >       u64 total_done = 0;
>> >       unsigned int total_idle_objs = 0;
>> > +     unsigned int total_dirty_objs = 0;
>> >       unsigned int total_relocated_objs = 0;
>> >       unsigned int total_moved_objs = 0;
>> >       unsigned int total_invalidated_objs = 0;
>> > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm
>> *vm, struct seq_file *m)
>> >       total_idle_objs = id;
>> >       id = 0;
>> >
>> > +     seq_puts(m, "\tDirty BOs:\n");
>> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
>> > +             if (!bo_va->base.bo)
>> > +                     continue;
>> > +             total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo,
>> m);
>> > +     }
>> > +     total_dirty_objs = id;
>> > +     id = 0;
>> > +
>> >       seq_puts(m, "\tRelocated BOs:\n");
>> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>> base.vm_status) {
>> >               if (!bo_va->base.bo)
>> > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm
>> *vm, struct seq_file *m)
>> >
>> >       seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n",
>> total_idle,
>> >                  total_idle_objs);
>> > +     seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n",
>> total_dirty,
>> > +                total_dirty_objs);
>> >       seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n",
>> total_relocated,
>> >                  total_relocated_objs);
>> >       seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n",
>> total_moved,
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> > index d9ab97eabda9..f91d4fcf80b8 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>> >       /* per VM BOs moved, but not yet updated in the PT */
>> >       struct list_head        moved;
>> >
>> > +     /* normal and per VM BOs that are not moved, but have new PT
>> entries */
>> > +     struct list_head        dirty;
>> > +
>> >       /* All BOs of this VM not currently in the state machine */
>> >       struct list_head        idle;
>> >
>>
>>
>
Tatsuyuki Ishi Oct. 31, 2023, 2:39 p.m. UTC | #5
> On Oct 31, 2023, at 23:17, Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> wrote:
> 
> 
> 
> On Tue, Oct 31, 2023 at 3:08 PM Christian König <christian.koenig@amd.com> wrote:
> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>> 
>> 
>> On Tue, Oct 31, 2023 at 2:57 PM Christian König <christian.koenig@amd.com> wrote:
>> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>> > The current amdgpu_gem_va_update_vm only tries to perform updates for the
>> > BO specified in the GEM ioctl; however, when a binding is split, the
>> > adjacent bindings also need to be updated. Such updates currently ends up
>> > getting deferred until next submission which causes stalls.
>> 
>> Yeah, that is a necessity. The hardware simply doesn't support what you 
>> try to do here in all cases.
>> 
>> What can the hardware not do here? Is this just needing to wait for TLB flushes before we can free pagetables, can we just delay that?
> 
> On some hardware generations (especially Navi1x, but also everything older than Polaris) you can't invalidate the TLB while it is in use.
> 
> For Polaris and older it just means that you don't have a guarantee that the shader can't access the memory any more. So delaying the free operation helps here.
> 
> But for Navi1x it's a workaround for a hardware bug. If you try to invalidate the TLB while it is in use you can potentially triggering memory accesses to random addresses.
> 
> That's why we still delay TLB invalidation's to the next CS and use a new VMID for each submission instead of invalidating the old one.
> 
> I'm currently working on changing that for Navi2x and newer (maybe Vega as well), but this is something you can really only do on some hw generations after validating that it works.
> 
> I think as long as we make sure all significant work gets done asynchronously, doing the TLB flushing on the next submit (/submissions, one per queue?) is fine for our purposes.

For a bit more of context, the performance / frame timing in Forza with just patch 5 wasn’t quite right. As Bas said, ideally we want to perform all the PT updates right away, and only defer the TLB flush.

For now the state machine part of this patch doesn’t seem to be going in the right direction so I’ll consider dropping this change.

Tatsuyuki.

> 
> (As an aside after thinking some more I *think* we also need some work to make these maps/unmaps (VALID->PRT and PRT->VALID) atomic, as I think it is valid Vulkan to make these race. As such I'm speculating we'd need a bit more reworking there too, not just a late free of the lower level pagetables)
> 
> - Bas 
> 
> Regards,
> Christian. 
> 
>>  
>> 
>> So this approach won't work in general.
>> 
>> Regards,
>> Christian.
>> 
>> >
>> > Introduce a new state "dirty", shared between per-VM BOs and traditional
>> > BOs, containing all BOs that have pending updates in `invalids`.
>> > amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
>> > in the dirty state.
>> >
>> > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>> >   3 files changed, 63 insertions(+), 24 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > index a1b15d0d6c48..01d3a97248b0 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>> >    * vital here, so they are not reported back to userspace.
>> >    */
>> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>> > -                                 struct amdgpu_vm *vm,
>> > -                                 struct amdgpu_bo_va *bo_va,
>> > -                                 uint32_t operation)
>> > +                                 struct amdgpu_vm *vm)
>> >   {
>> > +     struct amdgpu_bo_va *bo_va;
>> >       int r;
>> >   
>> >       if (!amdgpu_vm_ready(vm))
>> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>> >       if (r)
>> >               goto error;
>> >   
>> > -     if (operation == AMDGPU_VA_OP_MAP ||
>> > -         operation == AMDGPU_VA_OP_REPLACE) {
>> > +     spin_lock(&vm->status_lock);
>> > +     while (!list_empty(&vm->dirty)) {
>> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>> > +                                      base.vm_status);
>> > +             spin_unlock(&vm->status_lock);
>> > +
>> >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>> >               if (r)
>> >                       goto error;
>> > +             spin_lock(&vm->status_lock);
>> >       }
>> > +     spin_unlock(&vm->status_lock);
>> >   
>> >       r = amdgpu_vm_update_pdes(adev, vm, false);
>> >   
>> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>> >               break;
>> >       }
>> >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
>> > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>> > -                                     args->operation);
>> > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>> >   
>> >   error:
>> >       drm_exec_fini(&exec);
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > index dd6f72e2a1d6..01d31891cd05 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
>> >       spin_unlock(&vm_bo->vm->status_lock);
>> >   }
>> >   
>> > +/**
>> > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>> > + *
>> > + * @vm_bo: vm_bo which is dirty
>> > + *
>> > + * State for normal and per VM BOs that are not moved, but have new entries in
>> > + * bo_va->invalids.
>> > + */
>> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>> > +{
>> > +     spin_lock(&vm_bo->vm->status_lock);
>> > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>> > +     spin_unlock(&vm_bo->vm->status_lock);
>> > +}
>> > +
>> >   /**
>> >    * amdgpu_vm_bo_moved - vm_bo is moved
>> >    *
>> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
>> >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
>> >               amdgpu_vm_bo_get_memory(bo_va, stats);
>> >   
>> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
>> > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>> > +
>> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
>> >               amdgpu_vm_bo_get_memory(bo_va, stats);
>> >   
>> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> >                       dma_resv_unlock(resv);
>> >               spin_lock(&vm->status_lock);
>> >       }
>> > +
>> > +     while (!list_empty(&vm->dirty)) {
>> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>> > +                                      base.vm_status);
>> > +             spin_unlock(&vm->status_lock);
>> > +
>> > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>> > +             if (r)
>> > +                     return r;
>> > +             spin_lock(&vm->status_lock);
>> > +     }
>> >       spin_unlock(&vm->status_lock);
>> >   
>> >       return 0;
>> > @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>> >                                   struct amdgpu_bo_va_mapping *mapping)
>> >   {
>> >       struct amdgpu_vm *vm = bo_va->base.vm;
>> > -     struct amdgpu_bo *bo = bo_va->base.bo;
>> >   
>> >       mapping->bo_va = bo_va;
>> >       list_add(&mapping->list, &bo_va->invalids);
>> >       amdgpu_vm_it_insert(mapping, &vm->va);
>> > +     if (!bo_va->base.moved)
>> > +             amdgpu_vm_bo_dirty(&bo_va->base);
>> >   
>> >       if (mapping->flags & AMDGPU_PTE_PRT)
>> >               amdgpu_vm_prt_get(adev);
>> >   
>> > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>> > -         !bo_va->base.moved) {
>> > -             amdgpu_vm_bo_moved(&bo_va->base);
>> > -     }
>> >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>> >   }
>> >   
>> > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>> >                       before->flags = tmp->flags;
>> >                       before->bo_va = tmp->bo_va;
>> >                       list_add(&before->list, &tmp->bo_va->invalids);
>> > +                     if (!tmp->bo_va->base.moved)
>> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>> >               }
>> >   
>> >               /* Remember mapping split at the end */
>> > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>> >                       after->flags = tmp->flags;
>> >                       after->bo_va = tmp->bo_va;
>> >                       list_add(&after->list, &tmp->bo_va->invalids);
>> > +                     if (!tmp->bo_va->base.moved)
>> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>> >               }
>> >   
>> >               list_del(&tmp->list);
>> > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>> >   
>> >       /* Insert partial mapping before the range */
>> >       if (!list_empty(&before->list)) {
>> > -             struct amdgpu_bo *bo = before->bo_va->base.bo;
>> > -
>> >               amdgpu_vm_it_insert(before, &vm->va);
>> >               if (before->flags & AMDGPU_PTE_PRT)
>> >                       amdgpu_vm_prt_get(adev);
>> > -
>> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>> > -                 !before->bo_va->base.moved)
>> > -                     amdgpu_vm_bo_moved(&before->bo_va->base);
>> >       } else {
>> >               kfree(before);
>> >       }
>> >   
>> >       /* Insert partial mapping after the range */
>> >       if (!list_empty(&after->list)) {
>> > -             struct amdgpu_bo *bo = after->bo_va->base.bo;
>> > -
>> >               amdgpu_vm_it_insert(after, &vm->va);
>> >               if (after->flags & AMDGPU_PTE_PRT)
>> >                       amdgpu_vm_prt_get(adev);
>> > -
>> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>> > -                 !after->bo_va->base.moved)
>> > -                     amdgpu_vm_bo_moved(&after->bo_va->base);
>> >       } else {
>> >               kfree(after);
>> >       }
>> > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>> >       INIT_LIST_HEAD(&vm->evicted);
>> >       INIT_LIST_HEAD(&vm->relocated);
>> >       INIT_LIST_HEAD(&vm->moved);
>> > +     INIT_LIST_HEAD(&vm->dirty);
>> >       INIT_LIST_HEAD(&vm->idle);
>> >       INIT_LIST_HEAD(&vm->invalidated);
>> >       spin_lock_init(&vm->status_lock);
>> > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>> >   {
>> >       struct amdgpu_bo_va *bo_va, *tmp;
>> >       u64 total_idle = 0;
>> > +     u64 total_dirty = 0;
>> >       u64 total_relocated = 0;
>> >       u64 total_moved = 0;
>> >       u64 total_invalidated = 0;
>> >       u64 total_done = 0;
>> >       unsigned int total_idle_objs = 0;
>> > +     unsigned int total_dirty_objs = 0;
>> >       unsigned int total_relocated_objs = 0;
>> >       unsigned int total_moved_objs = 0;
>> >       unsigned int total_invalidated_objs = 0;
>> > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>> >       total_idle_objs = id;
>> >       id = 0;
>> >   
>> > +     seq_puts(m, "\tDirty BOs:\n");
>> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
>> > +             if (!bo_va->base.bo)
>> > +                     continue;
>> > +             total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
>> > +     }
>> > +     total_dirty_objs = id;
>> > +     id = 0;
>> > +
>> >       seq_puts(m, "\tRelocated BOs:\n");
>> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
>> >               if (!bo_va->base.bo)
>> > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>> >   
>> >       seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
>> >                  total_idle_objs);
>> > +     seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n", total_dirty,
>> > +                total_dirty_objs);
>> >       seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
>> >                  total_relocated_objs);
>> >       seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> > index d9ab97eabda9..f91d4fcf80b8 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>> >       /* per VM BOs moved, but not yet updated in the PT */
>> >       struct list_head        moved;
>> >   
>> > +     /* normal and per VM BOs that are not moved, but have new PT entries */
>> > +     struct list_head        dirty;
>> > +
>> >       /* All BOs of this VM not currently in the state machine */
>> >       struct list_head        idle;
>> >
kernel test robot Nov. 1, 2023, 1:18 a.m. UTC | #6
Hi Tatsuyuki,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-exynos/exynos-drm-next drm-intel/for-linux-next-fixes linus/master v6.6]
[cannot apply to drm/drm-next drm-intel/for-linux-next drm-tip/drm-tip next-20231031]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Tatsuyuki-Ishi/drm-amdgpu-Don-t-implicit-sync-PRT-maps/20231031-224530
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20231031134059.171277-4-ishitatsuyuki%40gmail.com
patch subject: [PATCH 3/6] drm/amdgpu: Flush VM updates for split bindings eagerly.
config: arc-randconfig-001-20231101 (https://download.01.org/0day-ci/archive/20231101/202311010948.G6I55pTu-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231101/202311010948.G6I55pTu-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311010948.G6I55pTu-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c:608: warning: Excess function parameter 'bo_va' description in 'amdgpu_gem_va_update_vm'
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c:608: warning: Excess function parameter 'operation' description in 'amdgpu_gem_va_update_vm'


vim +608 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

d38ceaf99ed015f Alex Deucher        2015-04-20  594  
d38ceaf99ed015f Alex Deucher        2015-04-20  595  /**
d38ceaf99ed015f Alex Deucher        2015-04-20  596   * amdgpu_gem_va_update_vm -update the bo_va in its VM
d38ceaf99ed015f Alex Deucher        2015-04-20  597   *
d38ceaf99ed015f Alex Deucher        2015-04-20  598   * @adev: amdgpu_device pointer
dc54d3d1744d23e Christian König     2017-03-13  599   * @vm: vm to update
d38ceaf99ed015f Alex Deucher        2015-04-20  600   * @bo_va: bo_va to update
dc54d3d1744d23e Christian König     2017-03-13  601   * @operation: map, unmap or clear
d38ceaf99ed015f Alex Deucher        2015-04-20  602   *
2ffdaafb5d5f37b Christian König     2017-01-27  603   * Update the bo_va directly after setting its address. Errors are not
d38ceaf99ed015f Alex Deucher        2015-04-20  604   * vital here, so they are not reported back to userspace.
d38ceaf99ed015f Alex Deucher        2015-04-20  605   */
d38ceaf99ed015f Alex Deucher        2015-04-20  606  static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  607  				    struct amdgpu_vm *vm)
d38ceaf99ed015f Alex Deucher        2015-04-20 @608  {
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  609  	struct amdgpu_bo_va *bo_va;
3f3333f8a0e90ac Christian König     2017-08-03  610  	int r;
d38ceaf99ed015f Alex Deucher        2015-04-20  611  
3f3333f8a0e90ac Christian König     2017-08-03  612  	if (!amdgpu_vm_ready(vm))
3f3333f8a0e90ac Christian König     2017-08-03  613  		return;
e410b5cbabe70b1 Chunming Zhou       2015-12-07  614  
f34678187a33970 Nicolai Hähnle      2017-03-23  615  	r = amdgpu_vm_clear_freed(adev, vm, NULL);
d38ceaf99ed015f Alex Deucher        2015-04-20  616  	if (r)
2ffdaafb5d5f37b Christian König     2017-01-27  617  		goto error;
194a33643b1161f monk.liu            2015-07-22  618  
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  619  	spin_lock(&vm->status_lock);
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  620  	while (!list_empty(&vm->dirty)) {
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  621  		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  622  					 base.vm_status);
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  623  		spin_unlock(&vm->status_lock);
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  624  
8f8cc3fb43508a2 Christian König     2022-03-17  625  		r = amdgpu_vm_bo_update(adev, bo_va, false);
0abc6878fc2d699 Christian König     2017-09-01  626  		if (r)
0abc6878fc2d699 Christian König     2017-09-01  627  			goto error;
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  628  		spin_lock(&vm->status_lock);
93bab704c1513f8 Gustavo A. R. Silva 2018-02-14  629  	}
ddf1ffe56ab385a Tatsuyuki Ishi      2023-10-31  630  	spin_unlock(&vm->status_lock);
93bab704c1513f8 Gustavo A. R. Silva 2018-02-14  631  
807e2994092c0bd Christian König     2019-03-14  632  	r = amdgpu_vm_update_pdes(adev, vm, false);
0abc6878fc2d699 Christian König     2017-09-01  633  
2ffdaafb5d5f37b Christian König     2017-01-27  634  error:
68fdd3df79ee4bf Christian König     2015-06-16  635  	if (r && r != -ERESTARTSYS)
d38ceaf99ed015f Alex Deucher        2015-04-20  636  		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
d38ceaf99ed015f Alex Deucher        2015-04-20  637  }
d38ceaf99ed015f Alex Deucher        2015-04-20  638
Lang Yu Nov. 2, 2023, 2:36 a.m. UTC | #7
On 10/31/ , Christian König wrote:
> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
> > 
> > 
> > On Tue, Oct 31, 2023 at 2:57 PM Christian König
> > <christian.koenig@amd.com> wrote:
> > 
> >     Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
> >     > The current amdgpu_gem_va_update_vm only tries to perform
> >     updates for the
> >     > BO specified in the GEM ioctl; however, when a binding is split, the
> >     > adjacent bindings also need to be updated. Such updates
> >     currently ends up
> >     > getting deferred until next submission which causes stalls.
> > 
> >     Yeah, that is a necessity. The hardware simply doesn't support
> >     what you
> >     try to do here in all cases.
> > 
> > 
> > What can the hardware not do here? Is this just needing to wait for TLB
> > flushes before we can free pagetables, can we just delay that?
> 
> On some hardware generations (especially Navi1x, but also everything older
> than Polaris) you can't invalidate the TLB while it is in use.

Hi Christian,

non-legacy invalidation can invalidate the TLB while it is in use.
Right? Thanks.

Regards,
Lang

> For Polaris and older it just means that you don't have a guarantee that the
> shader can't access the memory any more. So delaying the free operation
> helps here.
> 
> But for Navi1x it's a workaround for a hardware bug. If you try to
> invalidate the TLB while it is in use you can potentially triggering memory
> accesses to random addresses.
> 
> That's why we still delay TLB invalidation's to the next CS and use a new
> VMID for each submission instead of invalidating the old one.
> 
> I'm currently working on changing that for Navi2x and newer (maybe Vega as
> well), but this is something you can really only do on some hw generations
> after validating that it works.
> 
> Regards,
> Christian.
> 
> > 
> > 
> >     So this approach won't work in general.
> > 
> >     Regards,
> >     Christian.
> > 
> >     >
> >     > Introduce a new state "dirty", shared between per-VM BOs and
> >     traditional
> >     > BOs, containing all BOs that have pending updates in `invalids`.
> >     > amdgpu_gem_va_update_vm will now simply flush any pending
> >     updates for BOs
> >     > in the dirty state.
> >     >
> >     > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
> >     > ---
> >     >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
> >     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66
> >     ++++++++++++++++++-------
> >     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
> >     >   3 files changed, 63 insertions(+), 24 deletions(-)
> >     >
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >     > index a1b15d0d6c48..01d3a97248b0 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >     > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct
> >     drm_device *dev, void *data,
> >     >    * vital here, so they are not reported back to userspace.
> >     >    */
> >     >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> >     > -                                 struct amdgpu_vm *vm,
> >     > -                                 struct amdgpu_bo_va *bo_va,
> >     > -                                 uint32_t operation)
> >     > +                                 struct amdgpu_vm *vm)
> >     >   {
> >     > +     struct amdgpu_bo_va *bo_va;
> >     >       int r;
> >     >
> >     >       if (!amdgpu_vm_ready(vm))
> >     > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
> >     amdgpu_device *adev,
> >     >       if (r)
> >     >               goto error;
> >     >
> >     > -     if (operation == AMDGPU_VA_OP_MAP ||
> >     > -         operation == AMDGPU_VA_OP_REPLACE) {
> >     > +     spin_lock(&vm->status_lock);
> >     > +     while (!list_empty(&vm->dirty)) {
> >     > +             bo_va = list_first_entry(&vm->dirty, struct
> >     amdgpu_bo_va,
> >     > +                                      base.vm_status);
> >     > +             spin_unlock(&vm->status_lock);
> >     > +
> >     >               r = amdgpu_vm_bo_update(adev, bo_va, false);
> >     >               if (r)
> >     >                       goto error;
> >     > +             spin_lock(&vm->status_lock);
> >     >       }
> >     > +     spin_unlock(&vm->status_lock);
> >     >
> >     >       r = amdgpu_vm_update_pdes(adev, vm, false);
> >     >
> >     > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device
> >     *dev, void *data,
> >     >               break;
> >     >       }
> >     >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
> >     !amdgpu_vm_debug)
> >     > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> >     > -  args->operation);
> >     > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
> >     >
> >     >   error:
> >     >       drm_exec_fini(&exec);
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >     > index dd6f72e2a1d6..01d31891cd05 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >     > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
> >     amdgpu_vm_bo_base *vm_bo, bool evict
> >     >       spin_unlock(&vm_bo->vm->status_lock);
> >     >   }
> >     >
> >     > +/**
> >     > + * amdgpu_vm_bo_dirty - vm_bo is dirty
> >     > + *
> >     > + * @vm_bo: vm_bo which is dirty
> >     > + *
> >     > + * State for normal and per VM BOs that are not moved, but have
> >     new entries in
> >     > + * bo_va->invalids.
> >     > + */
> >     > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
> >     > +{
> >     > +     spin_lock(&vm_bo->vm->status_lock);
> >     > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
> >     > +     spin_unlock(&vm_bo->vm->status_lock);
> >     > +}
> >     > +
> >     >   /**
> >     >    * amdgpu_vm_bo_moved - vm_bo is moved
> >     >    *
> >     > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm
> >     *vm,
> >     >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
> >     base.eviction_status)
> >     >               amdgpu_vm_bo_get_memory(bo_va, stats);
> >     >
> >     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
> >     base.vm_status)
> >     > +             amdgpu_vm_bo_get_memory(bo_va, stats);
> >     > +
> >     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
> >     base.vm_status)
> >     >               amdgpu_vm_bo_get_memory(bo_va, stats);
> >     >
> >     > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct
> >     amdgpu_device *adev,
> >     >                       dma_resv_unlock(resv);
> >     >               spin_lock(&vm->status_lock);
> >     >       }
> >     > +
> >     > +     while (!list_empty(&vm->dirty)) {
> >     > +             bo_va = list_first_entry(&vm->dirty, struct
> >     amdgpu_bo_va,
> >     > +                                      base.vm_status);
> >     > +             spin_unlock(&vm->status_lock);
> >     > +
> >     > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
> >     > +             if (r)
> >     > +                     return r;
> >     > +             spin_lock(&vm->status_lock);
> >     > +     }
> >     >       spin_unlock(&vm->status_lock);
> >     >
> >     >       return 0;
> >     > @@ -1476,19 +1505,16 @@ static void
> >     amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
> >     >                                   struct amdgpu_bo_va_mapping
> >     *mapping)
> >     >   {
> >     >       struct amdgpu_vm *vm = bo_va->base.vm;
> >     > -     struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo>;
> >     >
> >     >       mapping->bo_va = bo_va;
> >     >       list_add(&mapping->list, &bo_va->invalids);
> >     >       amdgpu_vm_it_insert(mapping, &vm->va);
> >     > +     if (!bo_va->base.moved)
> >     > +             amdgpu_vm_bo_dirty(&bo_va->base);
> >     >
> >     >       if (mapping->flags & AMDGPU_PTE_PRT)
> >     >               amdgpu_vm_prt_get(adev);
> >     >
> >     > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
> >     > -         !bo_va->base.moved) {
> >     > -             amdgpu_vm_bo_moved(&bo_va->base);
> >     > -     }
> >     >       trace_amdgpu_vm_bo_map(bo_va, mapping);
> >     >   }
> >     >
> >     > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
> >     amdgpu_device *adev,
> >     >                       before->flags = tmp->flags;
> >     >                       before->bo_va = tmp->bo_va;
> >     >                       list_add(&before->list,
> >     &tmp->bo_va->invalids);
> >     > +                     if (!tmp->bo_va->base.moved)
> >     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
> >     >               }
> >     >
> >     >               /* Remember mapping split at the end */
> >     > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
> >     amdgpu_device *adev,
> >     >                       after->flags = tmp->flags;
> >     >                       after->bo_va = tmp->bo_va;
> >     >                       list_add(&after->list, &tmp->bo_va->invalids);
> >     > +                     if (!tmp->bo_va->base.moved)
> >     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
> >     >               }
> >     >
> >     >               list_del(&tmp->list);
> >     > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
> >     amdgpu_device *adev,
> >     >
> >     >       /* Insert partial mapping before the range */
> >     >       if (!list_empty(&before->list)) {
> >     > -             struct amdgpu_bo *bo = before->bo_va->base.bo
> >     <http://base.bo>;
> >     > -
> >     >               amdgpu_vm_it_insert(before, &vm->va);
> >     >               if (before->flags & AMDGPU_PTE_PRT)
> >     >                       amdgpu_vm_prt_get(adev);
> >     > -
> >     > -             if (bo && bo->tbo.base.resv ==
> >     vm->root.bo->tbo.base.resv &&
> >     > -                 !before->bo_va->base.moved)
> >     > -  amdgpu_vm_bo_moved(&before->bo_va->base);
> >     >       } else {
> >     >               kfree(before);
> >     >       }
> >     >
> >     >       /* Insert partial mapping after the range */
> >     >       if (!list_empty(&after->list)) {
> >     > -             struct amdgpu_bo *bo = after->bo_va->base.bo
> >     <http://base.bo>;
> >     > -
> >     >               amdgpu_vm_it_insert(after, &vm->va);
> >     >               if (after->flags & AMDGPU_PTE_PRT)
> >     >                       amdgpu_vm_prt_get(adev);
> >     > -
> >     > -             if (bo && bo->tbo.base.resv ==
> >     vm->root.bo->tbo.base.resv &&
> >     > -                 !after->bo_va->base.moved)
> >     > -  amdgpu_vm_bo_moved(&after->bo_va->base);
> >     >       } else {
> >     >               kfree(after);
> >     >       }
> >     > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device
> >     *adev, struct amdgpu_vm *vm, int32_t xcp
> >     >       INIT_LIST_HEAD(&vm->evicted);
> >     >       INIT_LIST_HEAD(&vm->relocated);
> >     >       INIT_LIST_HEAD(&vm->moved);
> >     > +     INIT_LIST_HEAD(&vm->dirty);
> >     >       INIT_LIST_HEAD(&vm->idle);
> >     >       INIT_LIST_HEAD(&vm->invalidated);
> >     >       spin_lock_init(&vm->status_lock);
> >     > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct
> >     amdgpu_vm *vm, struct seq_file *m)
> >     >   {
> >     >       struct amdgpu_bo_va *bo_va, *tmp;
> >     >       u64 total_idle = 0;
> >     > +     u64 total_dirty = 0;
> >     >       u64 total_relocated = 0;
> >     >       u64 total_moved = 0;
> >     >       u64 total_invalidated = 0;
> >     >       u64 total_done = 0;
> >     >       unsigned int total_idle_objs = 0;
> >     > +     unsigned int total_dirty_objs = 0;
> >     >       unsigned int total_relocated_objs = 0;
> >     >       unsigned int total_moved_objs = 0;
> >     >       unsigned int total_invalidated_objs = 0;
> >     > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct
> >     amdgpu_vm *vm, struct seq_file *m)
> >     >       total_idle_objs = id;
> >     >       id = 0;
> >     >
> >     > +     seq_puts(m, "\tDirty BOs:\n");
> >     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
> >     base.vm_status) {
> >     > +             if (!bo_va->base.bo <http://base.bo>)
> >     > +                     continue;
> >     > +             total_dirty += amdgpu_bo_print_info(id++,
> >     bo_va->base.bo <http://base.bo>, m);
> >     > +     }
> >     > +     total_dirty_objs = id;
> >     > +     id = 0;
> >     > +
> >     >       seq_puts(m, "\tRelocated BOs:\n");
> >     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
> >     base.vm_status) {
> >     >               if (!bo_va->base.bo <http://base.bo>)
> >     > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct
> >     amdgpu_vm *vm, struct seq_file *m)
> >     >
> >     >       seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n",
> >     total_idle,
> >     >                  total_idle_objs);
> >     > +     seq_printf(m, "\tTotal dirty size:  %12lld\tobjs:\t%d\n",
> >     total_dirty,
> >     > +                total_dirty_objs);
> >     >       seq_printf(m, "\tTotal relocated size:
> >      %12lld\tobjs:\t%d\n", total_relocated,
> >     >                  total_relocated_objs);
> >     >       seq_printf(m, "\tTotal moved size:  %12lld\tobjs:\t%d\n",
> >     total_moved,
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >     > index d9ab97eabda9..f91d4fcf80b8 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >     > @@ -276,6 +276,9 @@ struct amdgpu_vm {
> >     >       /* per VM BOs moved, but not yet updated in the PT */
> >     >       struct list_head        moved;
> >     >
> >     > +     /* normal and per VM BOs that are not moved, but have new
> >     PT entries */
> >     > +     struct list_head        dirty;
> >     > +
> >     >       /* All BOs of this VM not currently in the state machine */
> >     >       struct list_head        idle;
> >     >
> >
Christian König Nov. 2, 2023, 6:41 a.m. UTC | #8
Am 02.11.23 um 03:36 schrieb Lang Yu:
> On 10/31/ , Christian König wrote:
>> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>>>
>>> On Tue, Oct 31, 2023 at 2:57 PM Christian König
>>> <christian.koenig@amd.com> wrote:
>>>
>>>      Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>>>      > The current amdgpu_gem_va_update_vm only tries to perform
>>>      updates for the
>>>      > BO specified in the GEM ioctl; however, when a binding is split, the
>>>      > adjacent bindings also need to be updated. Such updates
>>>      currently ends up
>>>      > getting deferred until next submission which causes stalls.
>>>
>>>      Yeah, that is a necessity. The hardware simply doesn't support
>>>      what you
>>>      try to do here in all cases.
>>>
>>>
>>> What can the hardware not do here? Is this just needing to wait for TLB
>>> flushes before we can free pagetables, can we just delay that?
>> On some hardware generations (especially Navi1x, but also everything older
>> than Polaris) you can't invalidate the TLB while it is in use.
> Hi Christian,
>
> non-legacy invalidation can invalidate the TLB while it is in use.
> Right? Thanks.

Right, the problem is that they are only available starting with Vega 
(for GFX8 they only work for the APUs IIRC).

Regards,
Christian.

>
> Regards,
> Lang
>
>> For Polaris and older it just means that you don't have a guarantee that the
>> shader can't access the memory any more. So delaying the free operation
>> helps here.
>>
>> But for Navi1x it's a workaround for a hardware bug. If you try to
>> invalidate the TLB while it is in use you can potentially triggering memory
>> accesses to random addresses.
>>
>> That's why we still delay TLB invalidation's to the next CS and use a new
>> VMID for each submission instead of invalidating the old one.
>>
>> I'm currently working on changing that for Navi2x and newer (maybe Vega as
>> well), but this is something you can really only do on some hw generations
>> after validating that it works.
>>
>> Regards,
>> Christian.
>>
>>>
>>>      So this approach won't work in general.
>>>
>>>      Regards,
>>>      Christian.
>>>
>>>      >
>>>      > Introduce a new state "dirty", shared between per-VM BOs and
>>>      traditional
>>>      > BOs, containing all BOs that have pending updates in `invalids`.
>>>      > amdgpu_gem_va_update_vm will now simply flush any pending
>>>      updates for BOs
>>>      > in the dirty state.
>>>      >
>>>      > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
>>>      > ---
>>>      >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>>>      >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66
>>>      ++++++++++++++++++-------
>>>      >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>>>      >   3 files changed, 63 insertions(+), 24 deletions(-)
>>>      >
>>>      > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>      b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>      > index a1b15d0d6c48..01d3a97248b0 100644
>>>      > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>      > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>      > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct
>>>      drm_device *dev, void *data,
>>>      >    * vital here, so they are not reported back to userspace.
>>>      >    */
>>>      >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>>      > -                                 struct amdgpu_vm *vm,
>>>      > -                                 struct amdgpu_bo_va *bo_va,
>>>      > -                                 uint32_t operation)
>>>      > +                                 struct amdgpu_vm *vm)
>>>      >   {
>>>      > +     struct amdgpu_bo_va *bo_va;
>>>      >       int r;
>>>      >
>>>      >       if (!amdgpu_vm_ready(vm))
>>>      > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct
>>>      amdgpu_device *adev,
>>>      >       if (r)
>>>      >               goto error;
>>>      >
>>>      > -     if (operation == AMDGPU_VA_OP_MAP ||
>>>      > -         operation == AMDGPU_VA_OP_REPLACE) {
>>>      > +     spin_lock(&vm->status_lock);
>>>      > +     while (!list_empty(&vm->dirty)) {
>>>      > +             bo_va = list_first_entry(&vm->dirty, struct
>>>      amdgpu_bo_va,
>>>      > +                                      base.vm_status);
>>>      > +             spin_unlock(&vm->status_lock);
>>>      > +
>>>      >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>>>      >               if (r)
>>>      >                       goto error;
>>>      > +             spin_lock(&vm->status_lock);
>>>      >       }
>>>      > +     spin_unlock(&vm->status_lock);
>>>      >
>>>      >       r = amdgpu_vm_update_pdes(adev, vm, false);
>>>      >
>>>      > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>      *dev, void *data,
>>>      >               break;
>>>      >       }
>>>      >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
>>>      !amdgpu_vm_debug)
>>>      > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>>>      > -  args->operation);
>>>      > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>>>      >
>>>      >   error:
>>>      >       drm_exec_fini(&exec);
>>>      > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>      b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>      > index dd6f72e2a1d6..01d31891cd05 100644
>>>      > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>      > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>      > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct
>>>      amdgpu_vm_bo_base *vm_bo, bool evict
>>>      >       spin_unlock(&vm_bo->vm->status_lock);
>>>      >   }
>>>      >
>>>      > +/**
>>>      > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>>>      > + *
>>>      > + * @vm_bo: vm_bo which is dirty
>>>      > + *
>>>      > + * State for normal and per VM BOs that are not moved, but have
>>>      new entries in
>>>      > + * bo_va->invalids.
>>>      > + */
>>>      > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>>>      > +{
>>>      > +     spin_lock(&vm_bo->vm->status_lock);
>>>      > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>>>      > +     spin_unlock(&vm_bo->vm->status_lock);
>>>      > +}
>>>      > +
>>>      >   /**
>>>      >    * amdgpu_vm_bo_moved - vm_bo is moved
>>>      >    *
>>>      > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm
>>>      *vm,
>>>      >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
>>>      base.eviction_status)
>>>      >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>>      >
>>>      > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>>>      base.vm_status)
>>>      > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>>>      > +
>>>      >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>>>      base.vm_status)
>>>      >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>>      >
>>>      > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct
>>>      amdgpu_device *adev,
>>>      >                       dma_resv_unlock(resv);
>>>      >               spin_lock(&vm->status_lock);
>>>      >       }
>>>      > +
>>>      > +     while (!list_empty(&vm->dirty)) {
>>>      > +             bo_va = list_first_entry(&vm->dirty, struct
>>>      amdgpu_bo_va,
>>>      > +                                      base.vm_status);
>>>      > +             spin_unlock(&vm->status_lock);
>>>      > +
>>>      > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>>>      > +             if (r)
>>>      > +                     return r;
>>>      > +             spin_lock(&vm->status_lock);
>>>      > +     }
>>>      >       spin_unlock(&vm->status_lock);
>>>      >
>>>      >       return 0;
>>>      > @@ -1476,19 +1505,16 @@ static void
>>>      amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>>>      >                                   struct amdgpu_bo_va_mapping
>>>      *mapping)
>>>      >   {
>>>      >       struct amdgpu_vm *vm = bo_va->base.vm;
>>>      > -     struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo>;
>>>      >
>>>      >       mapping->bo_va = bo_va;
>>>      >       list_add(&mapping->list, &bo_va->invalids);
>>>      >       amdgpu_vm_it_insert(mapping, &vm->va);
>>>      > +     if (!bo_va->base.moved)
>>>      > +             amdgpu_vm_bo_dirty(&bo_va->base);
>>>      >
>>>      >       if (mapping->flags & AMDGPU_PTE_PRT)
>>>      >               amdgpu_vm_prt_get(adev);
>>>      >
>>>      > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>>>      > -         !bo_va->base.moved) {
>>>      > -             amdgpu_vm_bo_moved(&bo_va->base);
>>>      > -     }
>>>      >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>>>      >   }
>>>      >
>>>      > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>      amdgpu_device *adev,
>>>      >                       before->flags = tmp->flags;
>>>      >                       before->bo_va = tmp->bo_va;
>>>      >                       list_add(&before->list,
>>>      &tmp->bo_va->invalids);
>>>      > +                     if (!tmp->bo_va->base.moved)
>>>      > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>>      >               }
>>>      >
>>>      >               /* Remember mapping split at the end */
>>>      > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>      amdgpu_device *adev,
>>>      >                       after->flags = tmp->flags;
>>>      >                       after->bo_va = tmp->bo_va;
>>>      >                       list_add(&after->list, &tmp->bo_va->invalids);
>>>      > +                     if (!tmp->bo_va->base.moved)
>>>      > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>>      >               }
>>>      >
>>>      >               list_del(&tmp->list);
>>>      > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>      amdgpu_device *adev,
>>>      >
>>>      >       /* Insert partial mapping before the range */
>>>      >       if (!list_empty(&before->list)) {
>>>      > -             struct amdgpu_bo *bo = before->bo_va->base.bo
>>>      <http://base.bo>;
>>>      > -
>>>      >               amdgpu_vm_it_insert(before, &vm->va);
>>>      >               if (before->flags & AMDGPU_PTE_PRT)
>>>      >                       amdgpu_vm_prt_get(adev);
>>>      > -
>>>      > -             if (bo && bo->tbo.base.resv ==
>>>      vm->root.bo->tbo.base.resv &&
>>>      > -                 !before->bo_va->base.moved)
>>>      > -  amdgpu_vm_bo_moved(&before->bo_va->base);
>>>      >       } else {
>>>      >               kfree(before);
>>>      >       }
>>>      >
>>>      >       /* Insert partial mapping after the range */
>>>      >       if (!list_empty(&after->list)) {
>>>      > -             struct amdgpu_bo *bo = after->bo_va->base.bo
>>>      <http://base.bo>;
>>>      > -
>>>      >               amdgpu_vm_it_insert(after, &vm->va);
>>>      >               if (after->flags & AMDGPU_PTE_PRT)
>>>      >                       amdgpu_vm_prt_get(adev);
>>>      > -
>>>      > -             if (bo && bo->tbo.base.resv ==
>>>      vm->root.bo->tbo.base.resv &&
>>>      > -                 !after->bo_va->base.moved)
>>>      > -  amdgpu_vm_bo_moved(&after->bo_va->base);
>>>      >       } else {
>>>      >               kfree(after);
>>>      >       }
>>>      > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device
>>>      *adev, struct amdgpu_vm *vm, int32_t xcp
>>>      >       INIT_LIST_HEAD(&vm->evicted);
>>>      >       INIT_LIST_HEAD(&vm->relocated);
>>>      >       INIT_LIST_HEAD(&vm->moved);
>>>      > +     INIT_LIST_HEAD(&vm->dirty);
>>>      >       INIT_LIST_HEAD(&vm->idle);
>>>      >       INIT_LIST_HEAD(&vm->invalidated);
>>>      >       spin_lock_init(&vm->status_lock);
>>>      > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>      amdgpu_vm *vm, struct seq_file *m)
>>>      >   {
>>>      >       struct amdgpu_bo_va *bo_va, *tmp;
>>>      >       u64 total_idle = 0;
>>>      > +     u64 total_dirty = 0;
>>>      >       u64 total_relocated = 0;
>>>      >       u64 total_moved = 0;
>>>      >       u64 total_invalidated = 0;
>>>      >       u64 total_done = 0;
>>>      >       unsigned int total_idle_objs = 0;
>>>      > +     unsigned int total_dirty_objs = 0;
>>>      >       unsigned int total_relocated_objs = 0;
>>>      >       unsigned int total_moved_objs = 0;
>>>      >       unsigned int total_invalidated_objs = 0;
>>>      > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>      amdgpu_vm *vm, struct seq_file *m)
>>>      >       total_idle_objs = id;
>>>      >       id = 0;
>>>      >
>>>      > +     seq_puts(m, "\tDirty BOs:\n");
>>>      > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>>>      base.vm_status) {
>>>      > +             if (!bo_va->base.bo <http://base.bo>)
>>>      > +                     continue;
>>>      > +             total_dirty += amdgpu_bo_print_info(id++,
>>>      bo_va->base.bo <http://base.bo>, m);
>>>      > +     }
>>>      > +     total_dirty_objs = id;
>>>      > +     id = 0;
>>>      > +
>>>      >       seq_puts(m, "\tRelocated BOs:\n");
>>>      >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>>>      base.vm_status) {
>>>      >               if (!bo_va->base.bo <http://base.bo>)
>>>      > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>      amdgpu_vm *vm, struct seq_file *m)
>>>      >
>>>      >       seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n",
>>>      total_idle,
>>>      >                  total_idle_objs);
>>>      > +     seq_printf(m, "\tTotal dirty size:  %12lld\tobjs:\t%d\n",
>>>      total_dirty,
>>>      > +                total_dirty_objs);
>>>      >       seq_printf(m, "\tTotal relocated size:
>>>       %12lld\tobjs:\t%d\n", total_relocated,
>>>      >                  total_relocated_objs);
>>>      >       seq_printf(m, "\tTotal moved size:  %12lld\tobjs:\t%d\n",
>>>      total_moved,
>>>      > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>      b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>      > index d9ab97eabda9..f91d4fcf80b8 100644
>>>      > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>      > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>      > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>>>      >       /* per VM BOs moved, but not yet updated in the PT */
>>>      >       struct list_head        moved;
>>>      >
>>>      > +     /* normal and per VM BOs that are not moved, but have new
>>>      PT entries */
>>>      > +     struct list_head        dirty;
>>>      > +
>>>      >       /* All BOs of this VM not currently in the state machine */
>>>      >       struct list_head        idle;
>>>      >
>>>
Tatsuyuki Ishi Nov. 6, 2023, 7:56 a.m. UTC | #9
> On Oct 31, 2023, at 23:07, Christian König <christian.koenig@amd.com> wrote:
> 
> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>> 
>> 
>> On Tue, Oct 31, 2023 at 2:57 PM Christian König <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote:
>>> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>>> > The current amdgpu_gem_va_update_vm only tries to perform updates for the
>>> > BO specified in the GEM ioctl; however, when a binding is split, the
>>> > adjacent bindings also need to be updated. Such updates currently ends up
>>> > getting deferred until next submission which causes stalls.
>>> 
>>> Yeah, that is a necessity. The hardware simply doesn't support what you 
>>> try to do here in all cases.
>> 
>> What can the hardware not do here? Is this just needing to wait for TLB flushes before we can free pagetables, can we just delay that?
> 
> On some hardware generations (especially Navi1x, but also everything older than Polaris) you can't invalidate the TLB while it is in use.
> 
> For Polaris and older it just means that you don't have a guarantee that the shader can't access the memory any more. So delaying the free operation helps here.
> 
> But for Navi1x it's a workaround for a hardware bug. If you try to invalidate the TLB while it is in use you can potentially triggering memory accesses to random addresses.
> 
> That's why we still delay TLB invalidation's to the next CS and use a new VMID for each submission instead of invalidating the old one.

Thanks for the information. I looked into the VMID allocation logic and I see that if concurrent_flush is false, then we defer any flush (or VMID reuse that requires a flush) until that VMID is idle.

What patch #3 ends up doing is just performing the PT update right away. Any required TLB update is deferred by amdgpu_vm_update_range through the increment of tlb_seq, and amdgpu_vmid.c is responsible for doing the actual TLB flush in a manner that does not trigger the bug.

Can you confirm that this would be fine for the current hardware?

Tatsuyuki.

> 
> I'm currently working on changing that for Navi2x and newer (maybe Vega as well), but this is something you can really only do on some hw generations after validating that it works.
> 
> Regards,
> Christian. 
> 
>>  
>>> 
>>> So this approach won't work in general.
>>> 
>>> Regards,
>>> Christian.
>>> 
>>> >
>>> > Introduce a new state "dirty", shared between per-VM BOs and traditional
>>> > BOs, containing all BOs that have pending updates in `invalids`.
>>> > amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
>>> > in the dirty state.
>>> >
>>> > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com <mailto:ishitatsuyuki@gmail.com>>
>>> > ---
>>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
>>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
>>> >   3 files changed, 63 insertions(+), 24 deletions(-)
>>> >
>>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> > index a1b15d0d6c48..01d3a97248b0 100644
>>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>>> >    * vital here, so they are not reported back to userspace.
>>> >    */
>>> >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>> > -                                 struct amdgpu_vm *vm,
>>> > -                                 struct amdgpu_bo_va *bo_va,
>>> > -                                 uint32_t operation)
>>> > +                                 struct amdgpu_vm *vm)
>>> >   {
>>> > +     struct amdgpu_bo_va *bo_va;
>>> >       int r;
>>> >   
>>> >       if (!amdgpu_vm_ready(vm))
>>> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>> >       if (r)
>>> >               goto error;
>>> >   
>>> > -     if (operation == AMDGPU_VA_OP_MAP ||
>>> > -         operation == AMDGPU_VA_OP_REPLACE) {
>>> > +     spin_lock(&vm->status_lock);
>>> > +     while (!list_empty(&vm->dirty)) {
>>> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>>> > +                                      base.vm_status);
>>> > +             spin_unlock(&vm->status_lock);
>>> > +
>>> >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>>> >               if (r)
>>> >                       goto error;
>>> > +             spin_lock(&vm->status_lock);
>>> >       }
>>> > +     spin_unlock(&vm->status_lock);
>>> >   
>>> >       r = amdgpu_vm_update_pdes(adev, vm, false);
>>> >   
>>> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>> >               break;
>>> >       }
>>> >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
>>> > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>>> > -                                     args->operation);
>>> > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>>> >   
>>> >   error:
>>> >       drm_exec_fini(&exec);
>>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> > index dd6f72e2a1d6..01d31891cd05 100644
>>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
>>> >       spin_unlock(&vm_bo->vm->status_lock);
>>> >   }
>>> >   
>>> > +/**
>>> > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>>> > + *
>>> > + * @vm_bo: vm_bo which is dirty
>>> > + *
>>> > + * State for normal and per VM BOs that are not moved, but have new entries in
>>> > + * bo_va->invalids.
>>> > + */
>>> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>>> > +{
>>> > +     spin_lock(&vm_bo->vm->status_lock);
>>> > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>>> > +     spin_unlock(&vm_bo->vm->status_lock);
>>> > +}
>>> > +
>>> >   /**
>>> >    * amdgpu_vm_bo_moved - vm_bo is moved
>>> >    *
>>> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
>>> >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
>>> >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>> >   
>>> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
>>> > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>>> > +
>>> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
>>> >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>> >   
>>> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>> >                       dma_resv_unlock(resv);
>>> >               spin_lock(&vm->status_lock);
>>> >       }
>>> > +
>>> > +     while (!list_empty(&vm->dirty)) {
>>> > +             bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
>>> > +                                      base.vm_status);
>>> > +             spin_unlock(&vm->status_lock);
>>> > +
>>> > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>>> > +             if (r)
>>> > +                     return r;
>>> > +             spin_lock(&vm->status_lock);
>>> > +     }
>>> >       spin_unlock(&vm->status_lock);
>>> >   
>>> >       return 0;
>>> > @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>>> >                                   struct amdgpu_bo_va_mapping *mapping)
>>> >   {
>>> >       struct amdgpu_vm *vm = bo_va->base.vm;
>>> > -     struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo/>;
>>> >   
>>> >       mapping->bo_va = bo_va;
>>> >       list_add(&mapping->list, &bo_va->invalids);
>>> >       amdgpu_vm_it_insert(mapping, &vm->va);
>>> > +     if (!bo_va->base.moved)
>>> > +             amdgpu_vm_bo_dirty(&bo_va->base);
>>> >   
>>> >       if (mapping->flags & AMDGPU_PTE_PRT)
>>> >               amdgpu_vm_prt_get(adev);
>>> >   
>>> > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>>> > -         !bo_va->base.moved) {
>>> > -             amdgpu_vm_bo_moved(&bo_va->base);
>>> > -     }
>>> >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> >   }
>>> >   
>>> > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>>> >                       before->flags = tmp->flags;
>>> >                       before->bo_va = tmp->bo_va;
>>> >                       list_add(&before->list, &tmp->bo_va->invalids);
>>> > +                     if (!tmp->bo_va->base.moved)
>>> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>> >               }
>>> >   
>>> >               /* Remember mapping split at the end */
>>> > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>>> >                       after->flags = tmp->flags;
>>> >                       after->bo_va = tmp->bo_va;
>>> >                       list_add(&after->list, &tmp->bo_va->invalids);
>>> > +                     if (!tmp->bo_va->base.moved)
>>> > +                             amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>> >               }
>>> >   
>>> >               list_del(&tmp->list);
>>> > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>>> >   
>>> >       /* Insert partial mapping before the range */
>>> >       if (!list_empty(&before->list)) {
>>> > -             struct amdgpu_bo *bo = before->bo_va->base.bo <http://base.bo/>;
>>> > -
>>> >               amdgpu_vm_it_insert(before, &vm->va);
>>> >               if (before->flags & AMDGPU_PTE_PRT)
>>> >                       amdgpu_vm_prt_get(adev);
>>> > -
>>> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>>> > -                 !before->bo_va->base.moved)
>>> > -                     amdgpu_vm_bo_moved(&before->bo_va->base);
>>> >       } else {
>>> >               kfree(before);
>>> >       }
>>> >   
>>> >       /* Insert partial mapping after the range */
>>> >       if (!list_empty(&after->list)) {
>>> > -             struct amdgpu_bo *bo = after->bo_va->base.bo <http://base.bo/>;
>>> > -
>>> >               amdgpu_vm_it_insert(after, &vm->va);
>>> >               if (after->flags & AMDGPU_PTE_PRT)
>>> >                       amdgpu_vm_prt_get(adev);
>>> > -
>>> > -             if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>>> > -                 !after->bo_va->base.moved)
>>> > -                     amdgpu_vm_bo_moved(&after->bo_va->base);
>>> >       } else {
>>> >               kfree(after);
>>> >       }
>>> > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
>>> >       INIT_LIST_HEAD(&vm->evicted);
>>> >       INIT_LIST_HEAD(&vm->relocated);
>>> >       INIT_LIST_HEAD(&vm->moved);
>>> > +     INIT_LIST_HEAD(&vm->dirty);
>>> >       INIT_LIST_HEAD(&vm->idle);
>>> >       INIT_LIST_HEAD(&vm->invalidated);
>>> >       spin_lock_init(&vm->status_lock);
>>> > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>>> >   {
>>> >       struct amdgpu_bo_va *bo_va, *tmp;
>>> >       u64 total_idle = 0;
>>> > +     u64 total_dirty = 0;
>>> >       u64 total_relocated = 0;
>>> >       u64 total_moved = 0;
>>> >       u64 total_invalidated = 0;
>>> >       u64 total_done = 0;
>>> >       unsigned int total_idle_objs = 0;
>>> > +     unsigned int total_dirty_objs = 0;
>>> >       unsigned int total_relocated_objs = 0;
>>> >       unsigned int total_moved_objs = 0;
>>> >       unsigned int total_invalidated_objs = 0;
>>> > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>>> >       total_idle_objs = id;
>>> >       id = 0;
>>> >   
>>> > +     seq_puts(m, "\tDirty BOs:\n");
>>> > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
>>> > +             if (!bo_va->base.bo <http://base.bo/>)
>>> > +                     continue;
>>> > +             total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo <http://base.bo/>, m);
>>> > +     }
>>> > +     total_dirty_objs = id;
>>> > +     id = 0;
>>> > +
>>> >       seq_puts(m, "\tRelocated BOs:\n");
>>> >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
>>> >               if (!bo_va->base.bo <http://base.bo/>)
>>> > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>>> >   
>>> >       seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
>>> >                  total_idle_objs);
>>> > +     seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n", total_dirty,
>>> > +                total_dirty_objs);
>>> >       seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
>>> >                  total_relocated_objs);
>>> >       seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
>>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> > index d9ab97eabda9..f91d4fcf80b8 100644
>>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>>> >       /* per VM BOs moved, but not yet updated in the PT */
>>> >       struct list_head        moved;
>>> >   
>>> > +     /* normal and per VM BOs that are not moved, but have new PT entries */
>>> > +     struct list_head        dirty;
>>> > +
>>> >       /* All BOs of this VM not currently in the state machine */
>>> >       struct list_head        idle;
>>> >   
>>> 
>
Christian König Nov. 6, 2023, 1:33 p.m. UTC | #10
Am 06.11.23 um 08:56 schrieb Tatsuyuki Ishi:
>
>> On Oct 31, 2023, at 23:07, Christian König <christian.koenig@amd.com> 
>> wrote:
>>
>> Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen:
>>>
>>>
>>> On Tue, Oct 31, 2023 at 2:57 PM Christian König 
>>> <christian.koenig@amd.com> wrote:
>>>
>>>     Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi:
>>>     > The current amdgpu_gem_va_update_vm only tries to perform
>>>     updates for the
>>>     > BO specified in the GEM ioctl; however, when a binding is
>>>     split, the
>>>     > adjacent bindings also need to be updated. Such updates
>>>     currently ends up
>>>     > getting deferred until next submission which causes stalls.
>>>
>>>     Yeah, that is a necessity. The hardware simply doesn't support
>>>     what you
>>>     try to do here in all cases.
>>>
>>>
>>> What can the hardware not do here? Is this just needing to wait for 
>>> TLB flushes before we can free pagetables, can we just delay that?
>>
>> On some hardware generations (especially Navi1x, but also everything 
>> older than Polaris) you can't invalidate the TLB while it is in use.
>>
>> For Polaris and older it just means that you don't have a guarantee 
>> that the shader can't access the memory any more. So delaying the 
>> free operation helps here.
>>
>> But for Navi1x it's a workaround for a hardware bug. If you try to 
>> invalidate the TLB while it is in use you can potentially triggering 
>> memory accesses to random addresses.
>>
>> That's why we still delay TLB invalidation's to the next CS and use a 
>> new VMID for each submission instead of invalidating the old one.
>
> Thanks for the information. I looked into the VMID allocation logic 
> and I see that if concurrent_flush is false, then we defer any flush 
> (or VMID reuse that requires a flush) until that VMID is idle.
>
> What patch #3 ends up doing is just performing the PT update right 
> away. Any required TLB update is deferred by amdgpu_vm_update_range 
> through the increment of tlb_seq, and amdgpu_vmid.c is responsible for 
> doing the actual TLB flush in a manner that does not trigger the bug.
>
> Can you confirm that this would be fine for the current hardware?

Yeah, that should work. I'm just think about the UAPI a bit, we should 
probably improve this to use something like a drm_syncobj instead of 
just a flag to be future prove.

Christian.

>
> Tatsuyuki.
>
>>
>> I'm currently working on changing that for Navi2x and newer (maybe 
>> Vega as well), but this is something you can really only do on some 
>> hw generations after validating that it works.
>>
>> Regards,
>> Christian.
>>
>>>
>>>
>>>     So this approach won't work in general.
>>>
>>>     Regards,
>>>     Christian.
>>>
>>>     >
>>>     > Introduce a new state "dirty", shared between per-VM BOs and
>>>     traditional
>>>     > BOs, containing all BOs that have pending updates in `invalids`.
>>>     > amdgpu_gem_va_update_vm will now simply flush any pending
>>>     updates for BOs
>>>     > in the dirty state.
>>>     >
>>>     > Signed-off-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
>>>     > ---
>>>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
>>>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66
>>>     ++++++++++++++++++-------
>>>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 3 ++
>>>     >   3 files changed, 63 insertions(+), 24 deletions(-)
>>>     >
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     > index a1b15d0d6c48..01d3a97248b0 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>     > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct
>>>     drm_device *dev, void *data,
>>>     >    * vital here, so they are not reported back to userspace.
>>>     >    */
>>>     >   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>>     > -                                 struct amdgpu_vm *vm,
>>>     > -                                 struct amdgpu_bo_va *bo_va,
>>>     > -                                 uint32_t operation)
>>>     > +                                 struct amdgpu_vm *vm)
>>>     >   {
>>>     > +     struct amdgpu_bo_va *bo_va;
>>>     >       int r;
>>>     >
>>>     >       if (!amdgpu_vm_ready(vm))
>>>     > @@ -617,12 +616,18 @@ static void
>>>     amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>>     >       if (r)
>>>     >               goto error;
>>>     >
>>>     > -     if (operation == AMDGPU_VA_OP_MAP ||
>>>     > -         operation == AMDGPU_VA_OP_REPLACE) {
>>>     > +     spin_lock(&vm->status_lock);
>>>     > +     while (!list_empty(&vm->dirty)) {
>>>     > +             bo_va = list_first_entry(&vm->dirty, struct
>>>     amdgpu_bo_va,
>>>     > + base.vm_status);
>>>     > +  spin_unlock(&vm->status_lock);
>>>     > +
>>>     >               r = amdgpu_vm_bo_update(adev, bo_va, false);
>>>     >               if (r)
>>>     >                       goto error;
>>>     > +  spin_lock(&vm->status_lock);
>>>     >       }
>>>     > +     spin_unlock(&vm->status_lock);
>>>     >
>>>     >       r = amdgpu_vm_update_pdes(adev, vm, false);
>>>     >
>>>     > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>     *dev, void *data,
>>>     >               break;
>>>     >       }
>>>     >       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
>>>     !amdgpu_vm_debug)
>>>     > -             amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>>>     > -  args->operation);
>>>     > +             amdgpu_gem_va_update_vm(adev, &fpriv->vm);
>>>     >
>>>     >   error:
>>>     >       drm_exec_fini(&exec);
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     > index dd6f72e2a1d6..01d31891cd05 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>     > @@ -191,6 +191,21 @@ static void
>>>     amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
>>>     >  spin_unlock(&vm_bo->vm->status_lock);
>>>     >   }
>>>     >
>>>     > +/**
>>>     > + * amdgpu_vm_bo_dirty - vm_bo is dirty
>>>     > + *
>>>     > + * @vm_bo: vm_bo which is dirty
>>>     > + *
>>>     > + * State for normal and per VM BOs that are not moved, but
>>>     have new entries in
>>>     > + * bo_va->invalids.
>>>     > + */
>>>     > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
>>>     > +{
>>>     > +  spin_lock(&vm_bo->vm->status_lock);
>>>     > +     list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
>>>     > +  spin_unlock(&vm_bo->vm->status_lock);
>>>     > +}
>>>     > +
>>>     >   /**
>>>     >    * amdgpu_vm_bo_moved - vm_bo is moved
>>>     >    *
>>>     > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct
>>>     amdgpu_vm *vm,
>>>     >       list_for_each_entry_safe(bo_va, tmp, &vm->evicted,
>>>     base.eviction_status)
>>>     >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>>     >
>>>     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>>>     base.vm_status)
>>>     > +             amdgpu_vm_bo_get_memory(bo_va, stats);
>>>     > +
>>>     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>>>     base.vm_status)
>>>     >               amdgpu_vm_bo_get_memory(bo_va, stats);
>>>     >
>>>     > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct
>>>     amdgpu_device *adev,
>>>     >                       dma_resv_unlock(resv);
>>>     >  spin_lock(&vm->status_lock);
>>>     >       }
>>>     > +
>>>     > +     while (!list_empty(&vm->dirty)) {
>>>     > +             bo_va = list_first_entry(&vm->dirty, struct
>>>     amdgpu_bo_va,
>>>     > + base.vm_status);
>>>     > +  spin_unlock(&vm->status_lock);
>>>     > +
>>>     > +             r = amdgpu_vm_bo_update(adev, bo_va, false);
>>>     > +             if (r)
>>>     > +                     return r;
>>>     > +  spin_lock(&vm->status_lock);
>>>     > +     }
>>>     >       spin_unlock(&vm->status_lock);
>>>     >
>>>     >       return 0;
>>>     > @@ -1476,19 +1505,16 @@ static void
>>>     amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
>>>     >                                   struct amdgpu_bo_va_mapping
>>>     *mapping)
>>>     >   {
>>>     >       struct amdgpu_vm *vm = bo_va->base.vm;
>>>     > -     struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo/>;
>>>     >
>>>     >       mapping->bo_va = bo_va;
>>>     >       list_add(&mapping->list, &bo_va->invalids);
>>>     >       amdgpu_vm_it_insert(mapping, &vm->va);
>>>     > +     if (!bo_va->base.moved)
>>>     > +  amdgpu_vm_bo_dirty(&bo_va->base);
>>>     >
>>>     >       if (mapping->flags & AMDGPU_PTE_PRT)
>>>     >               amdgpu_vm_prt_get(adev);
>>>     >
>>>     > -     if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
>>>     > -         !bo_va->base.moved) {
>>>     > -  amdgpu_vm_bo_moved(&bo_va->base);
>>>     > -     }
>>>     >       trace_amdgpu_vm_bo_map(bo_va, mapping);
>>>     >   }
>>>     >
>>>     > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>     amdgpu_device *adev,
>>>     >                       before->flags = tmp->flags;
>>>     >                       before->bo_va = tmp->bo_va;
>>>     >  list_add(&before->list, &tmp->bo_va->invalids);
>>>     > +                     if (!tmp->bo_va->base.moved)
>>>     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>>     >               }
>>>     >
>>>     >               /* Remember mapping split at the end */
>>>     > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>     amdgpu_device *adev,
>>>     >                       after->flags = tmp->flags;
>>>     >                       after->bo_va = tmp->bo_va;
>>>     >  list_add(&after->list, &tmp->bo_va->invalids);
>>>     > +                     if (!tmp->bo_va->base.moved)
>>>     > +  amdgpu_vm_bo_dirty(&tmp->bo_va->base);
>>>     >               }
>>>     >
>>>     >               list_del(&tmp->list);
>>>     > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct
>>>     amdgpu_device *adev,
>>>     >
>>>     >       /* Insert partial mapping before the range */
>>>     >       if (!list_empty(&before->list)) {
>>>     > -             struct amdgpu_bo *bo = before->bo_va->base.bo
>>>     <http://base.bo/>;
>>>     > -
>>>     >               amdgpu_vm_it_insert(before, &vm->va);
>>>     >               if (before->flags & AMDGPU_PTE_PRT)
>>>     >  amdgpu_vm_prt_get(adev);
>>>     > -
>>>     > -             if (bo && bo->tbo.base.resv ==
>>>     vm->root.bo->tbo.base.resv &&
>>>     > -  !before->bo_va->base.moved)
>>>     > -  amdgpu_vm_bo_moved(&before->bo_va->base);
>>>     >       } else {
>>>     >               kfree(before);
>>>     >       }
>>>     >
>>>     >       /* Insert partial mapping after the range */
>>>     >       if (!list_empty(&after->list)) {
>>>     > -             struct amdgpu_bo *bo = after->bo_va->base.bo
>>>     <http://base.bo/>;
>>>     > -
>>>     >               amdgpu_vm_it_insert(after, &vm->va);
>>>     >               if (after->flags & AMDGPU_PTE_PRT)
>>>     >  amdgpu_vm_prt_get(adev);
>>>     > -
>>>     > -             if (bo && bo->tbo.base.resv ==
>>>     vm->root.bo->tbo.base.resv &&
>>>     > -  !after->bo_va->base.moved)
>>>     > -  amdgpu_vm_bo_moved(&after->bo_va->base);
>>>     >       } else {
>>>     >               kfree(after);
>>>     >       }
>>>     > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device
>>>     *adev, struct amdgpu_vm *vm, int32_t xcp
>>>     >       INIT_LIST_HEAD(&vm->evicted);
>>>     >       INIT_LIST_HEAD(&vm->relocated);
>>>     >       INIT_LIST_HEAD(&vm->moved);
>>>     > +     INIT_LIST_HEAD(&vm->dirty);
>>>     >       INIT_LIST_HEAD(&vm->idle);
>>>     >  INIT_LIST_HEAD(&vm->invalidated);
>>>     >  spin_lock_init(&vm->status_lock);
>>>     > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>     amdgpu_vm *vm, struct seq_file *m)
>>>     >   {
>>>     >       struct amdgpu_bo_va *bo_va, *tmp;
>>>     >       u64 total_idle = 0;
>>>     > +     u64 total_dirty = 0;
>>>     >       u64 total_relocated = 0;
>>>     >       u64 total_moved = 0;
>>>     >       u64 total_invalidated = 0;
>>>     >       u64 total_done = 0;
>>>     >       unsigned int total_idle_objs = 0;
>>>     > +     unsigned int total_dirty_objs = 0;
>>>     >       unsigned int total_relocated_objs = 0;
>>>     >       unsigned int total_moved_objs = 0;
>>>     >       unsigned int total_invalidated_objs = 0;
>>>     > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>     amdgpu_vm *vm, struct seq_file *m)
>>>     >       total_idle_objs = id;
>>>     >       id = 0;
>>>     >
>>>     > +     seq_puts(m, "\tDirty BOs:\n");
>>>     > +     list_for_each_entry_safe(bo_va, tmp, &vm->dirty,
>>>     base.vm_status) {
>>>     > +             if (!bo_va->base.bo <http://base.bo/>)
>>>     > +                     continue;
>>>     > +             total_dirty += amdgpu_bo_print_info(id++,
>>>     bo_va->base.bo <http://base.bo/>, m);
>>>     > +     }
>>>     > +     total_dirty_objs = id;
>>>     > +     id = 0;
>>>     > +
>>>     >       seq_puts(m, "\tRelocated BOs:\n");
>>>     >       list_for_each_entry_safe(bo_va, tmp, &vm->relocated,
>>>     base.vm_status) {
>>>     >               if (!bo_va->base.bo <http://base.bo/>)
>>>     > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct
>>>     amdgpu_vm *vm, struct seq_file *m)
>>>     >
>>>     >       seq_printf(m, "\tTotal idle size:  
>>>     %12lld\tobjs:\t%d\n", total_idle,
>>>     >                  total_idle_objs);
>>>     > +     seq_printf(m, "\tTotal dirty size:
>>>      %12lld\tobjs:\t%d\n", total_dirty,
>>>     > +                total_dirty_objs);
>>>     >       seq_printf(m, "\tTotal relocated size:
>>>      %12lld\tobjs:\t%d\n", total_relocated,
>>>     >                  total_relocated_objs);
>>>     >       seq_printf(m, "\tTotal moved size:
>>>      %12lld\tobjs:\t%d\n", total_moved,
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     > index d9ab97eabda9..f91d4fcf80b8 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>     > @@ -276,6 +276,9 @@ struct amdgpu_vm {
>>>     >       /* per VM BOs moved, but not yet updated in the PT */
>>>     >       struct list_head        moved;
>>>     >
>>>     > +     /* normal and per VM BOs that are not moved, but have
>>>     new PT entries */
>>>     > +     struct list_head        dirty;
>>>     > +
>>>     >       /* All BOs of this VM not currently in the state machine */
>>>     >       struct list_head        idle;
>>>     >
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a1b15d0d6c48..01d3a97248b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -604,10 +604,9 @@  int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
  * vital here, so they are not reported back to userspace.
  */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm,
-				    struct amdgpu_bo_va *bo_va,
-				    uint32_t operation)
+				    struct amdgpu_vm *vm)
 {
+	struct amdgpu_bo_va *bo_va;
 	int r;
 
 	if (!amdgpu_vm_ready(vm))
@@ -617,12 +616,18 @@  static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
-	if (operation == AMDGPU_VA_OP_MAP ||
-	    operation == AMDGPU_VA_OP_REPLACE) {
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->dirty)) {
+		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+					 base.vm_status);
+		spin_unlock(&vm->status_lock);
+
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
 		if (r)
 			goto error;
+		spin_lock(&vm->status_lock);
 	}
+	spin_unlock(&vm->status_lock);
 
 	r = amdgpu_vm_update_pdes(adev, vm, false);
 
@@ -792,8 +797,7 @@  int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
-		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-					args->operation);
+		amdgpu_gem_va_update_vm(adev, &fpriv->vm);
 
 error:
 	drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd6f72e2a1d6..01d31891cd05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -191,6 +191,21 @@  static void amdgpu_vm_bo_set_evicted(struct amdgpu_vm_bo_base *vm_bo, bool evict
 	spin_unlock(&vm_bo->vm->status_lock);
 }
 
+/**
+ * amdgpu_vm_bo_dirty - vm_bo is dirty
+ *
+ * @vm_bo: vm_bo which is dirty
+ *
+ * State for normal and per VM BOs that are not moved, but have new entries in
+ * bo_va->invalids.
+ */
+static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
+{
+	spin_lock(&vm_bo->vm->status_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
+	spin_unlock(&vm_bo->vm->status_lock);
+}
+
 /**
  * amdgpu_vm_bo_moved - vm_bo is moved
  *
@@ -1042,6 +1057,9 @@  void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
 	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
 		amdgpu_vm_bo_get_memory(bo_va, stats);
 
+	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
+		amdgpu_vm_bo_get_memory(bo_va, stats);
+
 	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
 		amdgpu_vm_bo_get_memory(bo_va, stats);
 
@@ -1411,6 +1429,17 @@  int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			dma_resv_unlock(resv);
 		spin_lock(&vm->status_lock);
 	}
+
+	while (!list_empty(&vm->dirty)) {
+		bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+					 base.vm_status);
+		spin_unlock(&vm->status_lock);
+
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;
+		spin_lock(&vm->status_lock);
+	}
 	spin_unlock(&vm->status_lock);
 
 	return 0;
@@ -1476,19 +1505,16 @@  static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 				    struct amdgpu_bo_va_mapping *mapping)
 {
 	struct amdgpu_vm *vm = bo_va->base.vm;
-	struct amdgpu_bo *bo = bo_va->base.bo;
 
 	mapping->bo_va = bo_va;
 	list_add(&mapping->list, &bo_va->invalids);
 	amdgpu_vm_it_insert(mapping, &vm->va);
+	if (!bo_va->base.moved)
+		amdgpu_vm_bo_dirty(&bo_va->base);
 
 	if (mapping->flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
 
-	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-	    !bo_va->base.moved) {
-		amdgpu_vm_bo_moved(&bo_va->base);
-	}
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
 
@@ -1725,6 +1751,8 @@  int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 			before->flags = tmp->flags;
 			before->bo_va = tmp->bo_va;
 			list_add(&before->list, &tmp->bo_va->invalids);
+			if (!tmp->bo_va->base.moved)
+				amdgpu_vm_bo_dirty(&tmp->bo_va->base);
 		}
 
 		/* Remember mapping split at the end */
@@ -1736,6 +1764,8 @@  int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 			after->flags = tmp->flags;
 			after->bo_va = tmp->bo_va;
 			list_add(&after->list, &tmp->bo_va->invalids);
+			if (!tmp->bo_va->base.moved)
+				amdgpu_vm_bo_dirty(&tmp->bo_va->base);
 		}
 
 		list_del(&tmp->list);
@@ -1761,30 +1791,18 @@  int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 
 	/* Insert partial mapping before the range */
 	if (!list_empty(&before->list)) {
-		struct amdgpu_bo *bo = before->bo_va->base.bo;
-
 		amdgpu_vm_it_insert(before, &vm->va);
 		if (before->flags & AMDGPU_PTE_PRT)
 			amdgpu_vm_prt_get(adev);
-
-		if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-		    !before->bo_va->base.moved)
-			amdgpu_vm_bo_moved(&before->bo_va->base);
 	} else {
 		kfree(before);
 	}
 
 	/* Insert partial mapping after the range */
 	if (!list_empty(&after->list)) {
-		struct amdgpu_bo *bo = after->bo_va->base.bo;
-
 		amdgpu_vm_it_insert(after, &vm->va);
 		if (after->flags & AMDGPU_PTE_PRT)
 			amdgpu_vm_prt_get(adev);
-
-		if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-		    !after->bo_va->base.moved)
-			amdgpu_vm_bo_moved(&after->bo_va->base);
 	} else {
 		kfree(after);
 	}
@@ -2136,6 +2154,7 @@  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
 	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->relocated);
 	INIT_LIST_HEAD(&vm->moved);
+	INIT_LIST_HEAD(&vm->dirty);
 	INIT_LIST_HEAD(&vm->idle);
 	INIT_LIST_HEAD(&vm->invalidated);
 	spin_lock_init(&vm->status_lock);
@@ -2648,11 +2667,13 @@  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
 {
 	struct amdgpu_bo_va *bo_va, *tmp;
 	u64 total_idle = 0;
+	u64 total_dirty = 0;
 	u64 total_relocated = 0;
 	u64 total_moved = 0;
 	u64 total_invalidated = 0;
 	u64 total_done = 0;
 	unsigned int total_idle_objs = 0;
+	unsigned int total_dirty_objs = 0;
 	unsigned int total_relocated_objs = 0;
 	unsigned int total_moved_objs = 0;
 	unsigned int total_invalidated_objs = 0;
@@ -2669,6 +2690,15 @@  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
 	total_idle_objs = id;
 	id = 0;
 
+	seq_puts(m, "\tDirty BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+	}
+	total_dirty_objs = id;
+	id = 0;
+
 	seq_puts(m, "\tRelocated BOs:\n");
 	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
 		if (!bo_va->base.bo)
@@ -2707,6 +2737,8 @@  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
 
 	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
 		   total_idle_objs);
+	seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n", total_dirty,
+		   total_dirty_objs);
 	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
 		   total_relocated_objs);
 	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d9ab97eabda9..f91d4fcf80b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -276,6 +276,9 @@  struct amdgpu_vm {
 	/* per VM BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
+	/* normal and per VM BOs that are not moved, but have new PT entries */
+	struct list_head	dirty;
+
 	/* All BOs of this VM not currently in the state machine */
 	struct list_head	idle;