diff mbox series

[v4,10/15] drm/shmem-helper: Take reservation lock instead of drm_gem_shmem locks

Message ID 20220417223707.157113-11-dmitry.osipenko@collabora.com (mailing list archive)
State New, archived
Headers show
Series Add generic memory shrinker to VirtIO-GPU and Panfrost DRM drivers | expand

Commit Message

Dmitry Osipenko April 17, 2022, 10:37 p.m. UTC
Replace drm_gem_shmem locks with the reservation lock to make GEM
lockings more consistent.

Previously drm_gem_shmem_vmap() and drm_gem_shmem_get_pages() were
protected by separate locks, now it's the same lock, but it doesn't
make any difference for the current GEM SHMEM users. Only Panfrost
and Lima drivers use vmap() and they do it in the slow code paths,
hence there was no practical justification for the usage of separate
lock in the vmap().

Suggested-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
---
 drivers/gpu/drm/drm_gem_shmem_helper.c  | 38 ++++++++++++-------------
 drivers/gpu/drm/lima/lima_gem.c         |  8 +++---
 drivers/gpu/drm/panfrost/panfrost_mmu.c | 15 ++++++----
 include/drm/drm_gem_shmem_helper.h      | 10 -------
 4 files changed, 31 insertions(+), 40 deletions(-)

Comments

Thomas Zimmermann April 18, 2022, 6:38 p.m. UTC | #1
Hi

Am 18.04.22 um 00:37 schrieb Dmitry Osipenko:
> Replace drm_gem_shmem locks with the reservation lock to make GEM
> lockings more consistent.
> 
> Previously drm_gem_shmem_vmap() and drm_gem_shmem_get_pages() were
> protected by separate locks, now it's the same lock, but it doesn't
> make any difference for the current GEM SHMEM users. Only Panfrost
> and Lima drivers use vmap() and they do it in the slow code paths,
> hence there was no practical justification for the usage of separate
> lock in the vmap().
> 
> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> ---
>   drivers/gpu/drm/drm_gem_shmem_helper.c  | 38 ++++++++++++-------------
>   drivers/gpu/drm/lima/lima_gem.c         |  8 +++---
>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 15 ++++++----
>   include/drm/drm_gem_shmem_helper.h      | 10 -------
>   4 files changed, 31 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index 30ee46348a99..3ecef571eff3 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -86,8 +86,6 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private)
>   	if (ret)
>   		goto err_release;
>   
> -	mutex_init(&shmem->pages_lock);
> -	mutex_init(&shmem->vmap_lock);
>   	INIT_LIST_HEAD(&shmem->madv_list);
>   
>   	if (!private) {
> @@ -157,8 +155,6 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
>   	WARN_ON(shmem->pages_use_count);
>   
>   	drm_gem_object_release(obj);
> -	mutex_destroy(&shmem->pages_lock);
> -	mutex_destroy(&shmem->vmap_lock);
>   	kfree(shmem);
>   }
>   EXPORT_SYMBOL_GPL(drm_gem_shmem_free);
> @@ -209,11 +205,11 @@ int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem)
>   
>   	WARN_ON(shmem->base.import_attach);
>   
> -	ret = mutex_lock_interruptible(&shmem->pages_lock);
> +	ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
>   	if (ret)
>   		return ret;
>   	ret = drm_gem_shmem_get_pages_locked(shmem);
> -	mutex_unlock(&shmem->pages_lock);
> +	dma_resv_unlock(shmem->base.resv);
>   
>   	return ret;
>   }
> @@ -248,9 +244,9 @@ static void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem)
>    */
>   void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem)
>   {
> -	mutex_lock(&shmem->pages_lock);
> +	dma_resv_lock(shmem->base.resv, NULL);
>   	drm_gem_shmem_put_pages_locked(shmem);
> -	mutex_unlock(&shmem->pages_lock);
> +	dma_resv_unlock(shmem->base.resv);
>   }
>   EXPORT_SYMBOL(drm_gem_shmem_put_pages);
>   
> @@ -310,7 +306,7 @@ static int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem,
>   	} else {
>   		pgprot_t prot = PAGE_KERNEL;
>   
> -		ret = drm_gem_shmem_get_pages(shmem);
> +		ret = drm_gem_shmem_get_pages_locked(shmem);
>   		if (ret)
>   			goto err_zero_use;
>   
> @@ -360,11 +356,11 @@ int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem,
>   {
>   	int ret;
>   
> -	ret = mutex_lock_interruptible(&shmem->vmap_lock);
> +	ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
>   	if (ret)
>   		return ret;
>   	ret = drm_gem_shmem_vmap_locked(shmem, map);

Within drm_gem_shmem_vmap_locked(), there's a call to dma_buf_vmap() for 
imported pages. If the exporter side also holds/acquires the same 
reservation lock as our object, the whole thing can deadlock. We cannot 
move dma_buf_vmap() out of the CS, because we still need to increment 
the reference counter. I honestly don't know how to easily fix this 
problem. There's a TODO item about replacing these locks at [1]. As 
Daniel suggested this patch, we should talk to him about the issue.

Best regards
Thomas

[1] 
https://www.kernel.org/doc/html/latest/gpu/todo.html#move-buffer-object-locking-to-dma-resv-lock



> -	mutex_unlock(&shmem->vmap_lock);
> +	dma_resv_unlock(shmem->base.resv);
>   
>   	return ret;
>   }
> @@ -385,7 +381,7 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
>   		dma_buf_vunmap(obj->import_attach->dmabuf, map);
>   	} else {
>   		vunmap(shmem->vaddr);
> -		drm_gem_shmem_put_pages(shmem);
> +		drm_gem_shmem_put_pages_locked(shmem);
>   	}
>   
>   	shmem->vaddr = NULL;
> @@ -406,9 +402,11 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
>   void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem,
>   			  struct iosys_map *map)
>   {
> -	mutex_lock(&shmem->vmap_lock);
> +	dma_resv_lock(shmem->base.resv, NULL);
>   	drm_gem_shmem_vunmap_locked(shmem, map);
> -	mutex_unlock(&shmem->vmap_lock);
> +	dma_resv_unlock(shmem->base.resv);
> +
> +	drm_gem_shmem_update_purgeable_status(shmem);
>   }
>   EXPORT_SYMBOL(drm_gem_shmem_vunmap);
>   
> @@ -442,14 +440,14 @@ drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
>    */
>   int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv)
>   {
> -	mutex_lock(&shmem->pages_lock);
> +	dma_resv_lock(shmem->base.resv, NULL);
>   
>   	if (shmem->madv >= 0)
>   		shmem->madv = madv;
>   
>   	madv = shmem->madv;
>   
> -	mutex_unlock(&shmem->pages_lock);
> +	dma_resv_unlock(shmem->base.resv);
>   
>   	return (madv >= 0);
>   }
> @@ -487,10 +485,10 @@ EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
>   
>   bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem)
>   {
> -	if (!mutex_trylock(&shmem->pages_lock))
> +	if (!dma_resv_trylock(shmem->base.resv))
>   		return false;
>   	drm_gem_shmem_purge_locked(shmem);
> -	mutex_unlock(&shmem->pages_lock);
> +	dma_resv_unlock(shmem->base.resv);
>   
>   	return true;
>   }
> @@ -549,7 +547,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
>   	/* We don't use vmf->pgoff since that has the fake offset */
>   	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
>   
> -	mutex_lock(&shmem->pages_lock);
> +	dma_resv_lock(shmem->base.resv, NULL);
>   
>   	if (page_offset >= num_pages ||
>   	    WARN_ON_ONCE(!shmem->pages) ||
> @@ -561,7 +559,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
>   		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
>   	}
>   
> -	mutex_unlock(&shmem->pages_lock);
> +	dma_resv_unlock(shmem->base.resv);
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> index 0f1ca0b0db49..5008f0c2428f 100644
> --- a/drivers/gpu/drm/lima/lima_gem.c
> +++ b/drivers/gpu/drm/lima/lima_gem.c
> @@ -34,7 +34,7 @@ int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm)
>   
>   	new_size = min(new_size, bo->base.base.size);
>   
> -	mutex_lock(&bo->base.pages_lock);
> +	dma_resv_lock(bo->base.base.resv, NULL);
>   
>   	if (bo->base.pages) {
>   		pages = bo->base.pages;
> @@ -42,7 +42,7 @@ int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm)
>   		pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
>   				       sizeof(*pages), GFP_KERNEL | __GFP_ZERO);
>   		if (!pages) {
> -			mutex_unlock(&bo->base.pages_lock);
> +			dma_resv_unlock(bo->base.base.resv);
>   			return -ENOMEM;
>   		}
>   
> @@ -56,13 +56,13 @@ int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm)
>   		struct page *page = shmem_read_mapping_page(mapping, i);
>   
>   		if (IS_ERR(page)) {
> -			mutex_unlock(&bo->base.pages_lock);
> +			dma_resv_unlock(bo->base.base.resv);
>   			return PTR_ERR(page);
>   		}
>   		pages[i] = page;
>   	}
>   
> -	mutex_unlock(&bo->base.pages_lock);
> +	dma_resv_unlock(bo->base.base.resv);
>   
>   	ret = sg_alloc_table_from_pages(&sgt, pages, i, 0,
>   					new_size, GFP_KERNEL);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index d3f82b26a631..404b8f67e2df 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -424,6 +424,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>   	struct panfrost_gem_mapping *bomapping;
>   	struct panfrost_gem_object *bo;
>   	struct address_space *mapping;
> +	struct drm_gem_object *obj;
>   	pgoff_t page_offset;
>   	struct sg_table *sgt;
>   	struct page **pages;
> @@ -446,13 +447,15 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>   	page_offset = addr >> PAGE_SHIFT;
>   	page_offset -= bomapping->mmnode.start;
>   
> -	mutex_lock(&bo->base.pages_lock);
> +	obj = &bo->base.base;
> +
> +	dma_resv_lock(obj->resv, NULL);
>   
>   	if (!bo->base.pages) {
>   		bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M,
>   				     sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
>   		if (!bo->sgts) {
> -			mutex_unlock(&bo->base.pages_lock);
> +			dma_resv_unlock(obj->resv);
>   			ret = -ENOMEM;
>   			goto err_bo;
>   		}
> @@ -462,7 +465,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>   		if (!pages) {
>   			kvfree(bo->sgts);
>   			bo->sgts = NULL;
> -			mutex_unlock(&bo->base.pages_lock);
> +			dma_resv_unlock(obj->resv);
>   			ret = -ENOMEM;
>   			goto err_bo;
>   		}
> @@ -472,7 +475,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>   		pages = bo->base.pages;
>   		if (pages[page_offset]) {
>   			/* Pages are already mapped, bail out. */
> -			mutex_unlock(&bo->base.pages_lock);
> +			dma_resv_unlock(obj->resv);
>   			goto out;
>   		}
>   	}
> @@ -483,13 +486,13 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>   	for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
>   		pages[i] = shmem_read_mapping_page(mapping, i);
>   		if (IS_ERR(pages[i])) {
> -			mutex_unlock(&bo->base.pages_lock);
> +			dma_resv_unlock(obj->resv);
>   			ret = PTR_ERR(pages[i]);
>   			goto err_pages;
>   		}
>   	}
>   
> -	mutex_unlock(&bo->base.pages_lock);
> +	dma_resv_unlock(obj->resv);
>   
>   	sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)];
>   	ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index d0a57853c188..70889533962a 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -26,11 +26,6 @@ struct drm_gem_shmem_object {
>   	 */
>   	struct drm_gem_object base;
>   
> -	/**
> -	 * @pages_lock: Protects the page table and use count
> -	 */
> -	struct mutex pages_lock;
> -
>   	/**
>   	 * @pages: Page table
>   	 */
> @@ -79,11 +74,6 @@ struct drm_gem_shmem_object {
>   	 */
>   	struct sg_table *sgt;
>   
> -	/**
> -	 * @vmap_lock: Protects the vmap address and use count
> -	 */
> -	struct mutex vmap_lock;
> -
>   	/**
>   	 * @vaddr: Kernel virtual address of the backing memory
>   	 */
Dmitry Osipenko April 18, 2022, 7:18 p.m. UTC | #2
Hello,

On 4/18/22 21:38, Thomas Zimmermann wrote:
> Hi
> 
> Am 18.04.22 um 00:37 schrieb Dmitry Osipenko:
>> Replace drm_gem_shmem locks with the reservation lock to make GEM
>> lockings more consistent.
>>
>> Previously drm_gem_shmem_vmap() and drm_gem_shmem_get_pages() were
>> protected by separate locks, now it's the same lock, but it doesn't
>> make any difference for the current GEM SHMEM users. Only Panfrost
>> and Lima drivers use vmap() and they do it in the slow code paths,
>> hence there was no practical justification for the usage of separate
>> lock in the vmap().
>>
>> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
>> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>> ---
...
>>   @@ -310,7 +306,7 @@ static int drm_gem_shmem_vmap_locked(struct
>> drm_gem_shmem_object *shmem,
>>       } else {
>>           pgprot_t prot = PAGE_KERNEL;
>>   -        ret = drm_gem_shmem_get_pages(shmem);
>> +        ret = drm_gem_shmem_get_pages_locked(shmem);
>>           if (ret)
>>               goto err_zero_use;
>>   @@ -360,11 +356,11 @@ int drm_gem_shmem_vmap(struct
>> drm_gem_shmem_object *shmem,
>>   {
>>       int ret;
>>   -    ret = mutex_lock_interruptible(&shmem->vmap_lock);
>> +    ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
>>       if (ret)
>>           return ret;
>>       ret = drm_gem_shmem_vmap_locked(shmem, map);
> 
> Within drm_gem_shmem_vmap_locked(), there's a call to dma_buf_vmap() for
> imported pages. If the exporter side also holds/acquires the same
> reservation lock as our object, the whole thing can deadlock. We cannot
> move dma_buf_vmap() out of the CS, because we still need to increment
> the reference counter. I honestly don't know how to easily fix this
> problem. There's a TODO item about replacing these locks at [1]. As
> Daniel suggested this patch, we should talk to him about the issue.
> 
> Best regards
> Thomas
> 
> [1]
> https://www.kernel.org/doc/html/latest/gpu/todo.html#move-buffer-object-locking-to-dma-resv-lock

Indeed, good catch! Perhaps we could simply use a separate lock for the
vmapping of the *imported* GEMs? The vmap_use_count is used only by
vmap/vunmap, so it doesn't matter which lock is used by these functions
in the case of imported GEMs since we only need to protect the
vmap_use_count.
Daniel Vetter April 27, 2022, 2:50 p.m. UTC | #3
On Mon, Apr 18, 2022 at 10:18:54PM +0300, Dmitry Osipenko wrote:
> Hello,
> 
> On 4/18/22 21:38, Thomas Zimmermann wrote:
> > Hi
> > 
> > Am 18.04.22 um 00:37 schrieb Dmitry Osipenko:
> >> Replace drm_gem_shmem locks with the reservation lock to make GEM
> >> lockings more consistent.
> >>
> >> Previously drm_gem_shmem_vmap() and drm_gem_shmem_get_pages() were
> >> protected by separate locks, now it's the same lock, but it doesn't
> >> make any difference for the current GEM SHMEM users. Only Panfrost
> >> and Lima drivers use vmap() and they do it in the slow code paths,
> >> hence there was no practical justification for the usage of separate
> >> lock in the vmap().
> >>
> >> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> >> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> >> ---
> ...
> >>   @@ -310,7 +306,7 @@ static int drm_gem_shmem_vmap_locked(struct
> >> drm_gem_shmem_object *shmem,
> >>       } else {
> >>           pgprot_t prot = PAGE_KERNEL;
> >>   -        ret = drm_gem_shmem_get_pages(shmem);
> >> +        ret = drm_gem_shmem_get_pages_locked(shmem);
> >>           if (ret)
> >>               goto err_zero_use;
> >>   @@ -360,11 +356,11 @@ int drm_gem_shmem_vmap(struct
> >> drm_gem_shmem_object *shmem,
> >>   {
> >>       int ret;
> >>   -    ret = mutex_lock_interruptible(&shmem->vmap_lock);
> >> +    ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
> >>       if (ret)
> >>           return ret;
> >>       ret = drm_gem_shmem_vmap_locked(shmem, map);
> > 
> > Within drm_gem_shmem_vmap_locked(), there's a call to dma_buf_vmap() for
> > imported pages. If the exporter side also holds/acquires the same
> > reservation lock as our object, the whole thing can deadlock. We cannot
> > move dma_buf_vmap() out of the CS, because we still need to increment
> > the reference counter. I honestly don't know how to easily fix this
> > problem. There's a TODO item about replacing these locks at [1]. As
> > Daniel suggested this patch, we should talk to him about the issue.
> > 
> > Best regards
> > Thomas
> > 
> > [1]
> > https://www.kernel.org/doc/html/latest/gpu/todo.html#move-buffer-object-locking-to-dma-resv-lock
> 
> Indeed, good catch! Perhaps we could simply use a separate lock for the
> vmapping of the *imported* GEMs? The vmap_use_count is used only by
> vmap/vunmap, so it doesn't matter which lock is used by these functions
> in the case of imported GEMs since we only need to protect the
> vmap_use_count.

Apologies for the late reply, I'm flooded.

I discussed this with Daniel Stone last week in a chat, roughly what we
need to do is:

1. Pick a function from shmem helpers.

2. Go through all drivers that call this, and make sure that we acquire
dma_resv_lock in the top level driver entry point for this.

3. Once all driver code paths are converted, add a dma_resv_assert_held()
call to that function to make sure you have it all correctly.

4. Repeate 1-3 until all shmem helper functions are converted over.

5. Ditch the 3 different shmem helper locks.

The trouble is that I forgot that vmap is a thing, so that needs more
work. I think there's two approaches here:
- Do the vmap at import time. This is the trick we used to untangle the
  dma_resv_lock issues around dma_buf_attachment_map()
- Change the dma_buf_vmap rules that callers must hold the dma_resv_lock.
- Maybe also do what you suggest and keep a separate lock for this, but
  the fundamental issue is that this doesn't really work - if you share
  buffers both ways with two drivers using shmem helpers, then the
  ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
  you can get some nice deadlocks. So not a great approach (and also the
  reason why we really need to get everyone to move towards dma_resv_lock
  as _the_ buffer object lock, since otherwise we'll never get a
  consistent lock nesting hierarchy).

The trouble here is that trying to be clever and doing the conversion just
in shmem helpers wont work, because there's a lot of cases where the
drivers are all kinds of inconsistent with their locking.

Adding Daniel S, also maybe for questions it'd be fastest to chat on irc?
-Daniel
Dmitry Osipenko April 28, 2022, 6:31 p.m. UTC | #4
Hello Daniel,

27.04.2022 17:50, Daniel Vetter пишет:
> On Mon, Apr 18, 2022 at 10:18:54PM +0300, Dmitry Osipenko wrote:
>> Hello,
>>
>> On 4/18/22 21:38, Thomas Zimmermann wrote:
>>> Hi
>>>
>>> Am 18.04.22 um 00:37 schrieb Dmitry Osipenko:
>>>> Replace drm_gem_shmem locks with the reservation lock to make GEM
>>>> lockings more consistent.
>>>>
>>>> Previously drm_gem_shmem_vmap() and drm_gem_shmem_get_pages() were
>>>> protected by separate locks, now it's the same lock, but it doesn't
>>>> make any difference for the current GEM SHMEM users. Only Panfrost
>>>> and Lima drivers use vmap() and they do it in the slow code paths,
>>>> hence there was no practical justification for the usage of separate
>>>> lock in the vmap().
>>>>
>>>> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
>>>> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>>> ---
>> ...
>>>>   @@ -310,7 +306,7 @@ static int drm_gem_shmem_vmap_locked(struct
>>>> drm_gem_shmem_object *shmem,
>>>>       } else {
>>>>           pgprot_t prot = PAGE_KERNEL;
>>>>   -        ret = drm_gem_shmem_get_pages(shmem);
>>>> +        ret = drm_gem_shmem_get_pages_locked(shmem);
>>>>           if (ret)
>>>>               goto err_zero_use;
>>>>   @@ -360,11 +356,11 @@ int drm_gem_shmem_vmap(struct
>>>> drm_gem_shmem_object *shmem,
>>>>   {
>>>>       int ret;
>>>>   -    ret = mutex_lock_interruptible(&shmem->vmap_lock);
>>>> +    ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
>>>>       if (ret)
>>>>           return ret;
>>>>       ret = drm_gem_shmem_vmap_locked(shmem, map);
>>>
>>> Within drm_gem_shmem_vmap_locked(), there's a call to dma_buf_vmap() for
>>> imported pages. If the exporter side also holds/acquires the same
>>> reservation lock as our object, the whole thing can deadlock. We cannot
>>> move dma_buf_vmap() out of the CS, because we still need to increment
>>> the reference counter. I honestly don't know how to easily fix this
>>> problem. There's a TODO item about replacing these locks at [1]. As
>>> Daniel suggested this patch, we should talk to him about the issue.
>>>
>>> Best regards
>>> Thomas
>>>
>>> [1]
>>> https://www.kernel.org/doc/html/latest/gpu/todo.html#move-buffer-object-locking-to-dma-resv-lock
>>
>> Indeed, good catch! Perhaps we could simply use a separate lock for the
>> vmapping of the *imported* GEMs? The vmap_use_count is used only by
>> vmap/vunmap, so it doesn't matter which lock is used by these functions
>> in the case of imported GEMs since we only need to protect the
>> vmap_use_count.
> 
> Apologies for the late reply, I'm flooded.
> 
> I discussed this with Daniel Stone last week in a chat, roughly what we
> need to do is:
> 
> 1. Pick a function from shmem helpers.
> 
> 2. Go through all drivers that call this, and make sure that we acquire
> dma_resv_lock in the top level driver entry point for this.
> 
> 3. Once all driver code paths are converted, add a dma_resv_assert_held()
> call to that function to make sure you have it all correctly.
> 4. Repeate 1-3 until all shmem helper functions are converted over.
Somehow I didn't notice the existence of dma_resv_assert_held(), thank
you for the suggestion :)

> 
> 5. Ditch the 3 different shmem helper locks.
> 
> The trouble is that I forgot that vmap is a thing, so that needs more
> work. I think there's two approaches here:
> - Do the vmap at import time. This is the trick we used to untangle the
>   dma_resv_lock issues around dma_buf_attachment_map()

> - Change the dma_buf_vmap rules that callers must hold the dma_resv_lock.

I'll consider this option for v6, thank you.

I see now that you actually want to define the new rules for the
dma-bufs in general and not only in the context of the DRM code, this
now makes much more sense to me.

> - Maybe also do what you suggest and keep a separate lock for this, but
>   the fundamental issue is that this doesn't really work - if you share
>   buffers both ways with two drivers using shmem helpers, then the
>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
>   you can get some nice deadlocks. So not a great approach (and also the
>   reason why we really need to get everyone to move towards dma_resv_lock
>   as _the_ buffer object lock, since otherwise we'll never get a
>   consistent lock nesting hierarchy).

The separate locks should work okay because it will be always the
exporter that takes the dma_resv_lock. But I agree that it's less ideal
than defining the new rules for dma-bufs since sometime you will take
the resv lock and sometime not, potentially hiding bugs related to lockings.

> The trouble here is that trying to be clever and doing the conversion just
> in shmem helpers wont work, because there's a lot of cases where the
> drivers are all kinds of inconsistent with their locking.
> 
> Adding Daniel S, also maybe for questions it'd be fastest to chat on irc?

My nickname is digetx on the #dri-devel channel, feel free to ping me if
needed. Right now yours suggestions are clear to me, hence no extra
questions.

Thank you for the review.
Daniel Vetter May 4, 2022, 8:21 a.m. UTC | #5
On Thu, Apr 28, 2022 at 09:31:00PM +0300, Dmitry Osipenko wrote:
> Hello Daniel,
> 
> 27.04.2022 17:50, Daniel Vetter пишет:
> > On Mon, Apr 18, 2022 at 10:18:54PM +0300, Dmitry Osipenko wrote:
> >> Hello,
> >>
> >> On 4/18/22 21:38, Thomas Zimmermann wrote:
> >>> Hi
> >>>
> >>> Am 18.04.22 um 00:37 schrieb Dmitry Osipenko:
> >>>> Replace drm_gem_shmem locks with the reservation lock to make GEM
> >>>> lockings more consistent.
> >>>>
> >>>> Previously drm_gem_shmem_vmap() and drm_gem_shmem_get_pages() were
> >>>> protected by separate locks, now it's the same lock, but it doesn't
> >>>> make any difference for the current GEM SHMEM users. Only Panfrost
> >>>> and Lima drivers use vmap() and they do it in the slow code paths,
> >>>> hence there was no practical justification for the usage of separate
> >>>> lock in the vmap().
> >>>>
> >>>> Suggested-by: Daniel Vetter <daniel@ffwll.ch>
> >>>> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> >>>> ---
> >> ...
> >>>>   @@ -310,7 +306,7 @@ static int drm_gem_shmem_vmap_locked(struct
> >>>> drm_gem_shmem_object *shmem,
> >>>>       } else {
> >>>>           pgprot_t prot = PAGE_KERNEL;
> >>>>   -        ret = drm_gem_shmem_get_pages(shmem);
> >>>> +        ret = drm_gem_shmem_get_pages_locked(shmem);
> >>>>           if (ret)
> >>>>               goto err_zero_use;
> >>>>   @@ -360,11 +356,11 @@ int drm_gem_shmem_vmap(struct
> >>>> drm_gem_shmem_object *shmem,
> >>>>   {
> >>>>       int ret;
> >>>>   -    ret = mutex_lock_interruptible(&shmem->vmap_lock);
> >>>> +    ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
> >>>>       if (ret)
> >>>>           return ret;
> >>>>       ret = drm_gem_shmem_vmap_locked(shmem, map);
> >>>
> >>> Within drm_gem_shmem_vmap_locked(), there's a call to dma_buf_vmap() for
> >>> imported pages. If the exporter side also holds/acquires the same
> >>> reservation lock as our object, the whole thing can deadlock. We cannot
> >>> move dma_buf_vmap() out of the CS, because we still need to increment
> >>> the reference counter. I honestly don't know how to easily fix this
> >>> problem. There's a TODO item about replacing these locks at [1]. As
> >>> Daniel suggested this patch, we should talk to him about the issue.
> >>>
> >>> Best regards
> >>> Thomas
> >>>
> >>> [1]
> >>> https://www.kernel.org/doc/html/latest/gpu/todo.html#move-buffer-object-locking-to-dma-resv-lock
> >>
> >> Indeed, good catch! Perhaps we could simply use a separate lock for the
> >> vmapping of the *imported* GEMs? The vmap_use_count is used only by
> >> vmap/vunmap, so it doesn't matter which lock is used by these functions
> >> in the case of imported GEMs since we only need to protect the
> >> vmap_use_count.
> > 
> > Apologies for the late reply, I'm flooded.
> > 
> > I discussed this with Daniel Stone last week in a chat, roughly what we
> > need to do is:
> > 
> > 1. Pick a function from shmem helpers.
> > 
> > 2. Go through all drivers that call this, and make sure that we acquire
> > dma_resv_lock in the top level driver entry point for this.
> > 
> > 3. Once all driver code paths are converted, add a dma_resv_assert_held()
> > call to that function to make sure you have it all correctly.
> > 4. Repeate 1-3 until all shmem helper functions are converted over.
> Somehow I didn't notice the existence of dma_resv_assert_held(), thank
> you for the suggestion :)
> 
> > 
> > 5. Ditch the 3 different shmem helper locks.
> > 
> > The trouble is that I forgot that vmap is a thing, so that needs more
> > work. I think there's two approaches here:
> > - Do the vmap at import time. This is the trick we used to untangle the
> >   dma_resv_lock issues around dma_buf_attachment_map()
> 
> > - Change the dma_buf_vmap rules that callers must hold the dma_resv_lock.
> 
> I'll consider this option for v6, thank you.
> 
> I see now that you actually want to define the new rules for the
> dma-bufs in general and not only in the context of the DRM code, this
> now makes much more sense to me.

Yeah dma-buf is a cross driver interface, so we should try to be
consistent here. We didn't do this in the past, where the only reason you
didn't get lockdep splats was because you normally didn't run all possible
combinations of drivers and importer/exporter relationships in one system.
But that means it becomes very tricky to reason about how dma-buf really
works.

> > - Maybe also do what you suggest and keep a separate lock for this, but
> >   the fundamental issue is that this doesn't really work - if you share
> >   buffers both ways with two drivers using shmem helpers, then the
> >   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
> >   you can get some nice deadlocks. So not a great approach (and also the
> >   reason why we really need to get everyone to move towards dma_resv_lock
> >   as _the_ buffer object lock, since otherwise we'll never get a
> >   consistent lock nesting hierarchy).
> 
> The separate locks should work okay because it will be always the
> exporter that takes the dma_resv_lock. But I agree that it's less ideal
> than defining the new rules for dma-bufs since sometime you will take
> the resv lock and sometime not, potentially hiding bugs related to lockings.

That's the issue, some importers need to take the dma_resv_lock for
dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
dynamic memory manager). In practice it'll work as well as what we have
currently, which is similarly inconsistent, except with per-driver locks
instead of shared locks from shmem helpers or dma-buf, so less obvious
that things are inconsistent.

So yeah if it's too messy maybe the approach is to have a separate lock
for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
series.
-Daniel

> > The trouble here is that trying to be clever and doing the conversion just
> > in shmem helpers wont work, because there's a lot of cases where the
> > drivers are all kinds of inconsistent with their locking.
> > 
> > Adding Daniel S, also maybe for questions it'd be fastest to chat on irc?
> 
> My nickname is digetx on the #dri-devel channel, feel free to ping me if
> needed. Right now yours suggestions are clear to me, hence no extra
> questions.
> 
> Thank you for the review.
Dmitry Osipenko May 4, 2022, 3:56 p.m. UTC | #6
On 5/4/22 11:21, Daniel Vetter wrote:
...
>>> - Maybe also do what you suggest and keep a separate lock for this, but
>>>   the fundamental issue is that this doesn't really work - if you share
>>>   buffers both ways with two drivers using shmem helpers, then the
>>>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
>>>   you can get some nice deadlocks. So not a great approach (and also the
>>>   reason why we really need to get everyone to move towards dma_resv_lock
>>>   as _the_ buffer object lock, since otherwise we'll never get a
>>>   consistent lock nesting hierarchy).
>>
>> The separate locks should work okay because it will be always the
>> exporter that takes the dma_resv_lock. But I agree that it's less ideal
>> than defining the new rules for dma-bufs since sometime you will take
>> the resv lock and sometime not, potentially hiding bugs related to lockings.
> 
> That's the issue, some importers need to take the dma_resv_lock for
> dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
> dynamic memory manager). In practice it'll work as well as what we have
> currently, which is similarly inconsistent, except with per-driver locks
> instead of shared locks from shmem helpers or dma-buf, so less obvious
> that things are inconsistent.
> 
> So yeah if it's too messy maybe the approach is to have a separate lock
> for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
> series.

The amdgpu driver was the fist who introduced the concept of movable
memory for dma-bufs. Now we want to support it for DRM SHMEM too. For
both amdgpu ttm and shmem drivers we will want to hold the reservation
lock when we're touching moveable buffers. The current way of denoting
that dma-buf is movable is to implement the pin/unpin callbacks of the
dma-buf ops, should be doable for shmem.

A day ago I found that mapping of imported dma-bufs is broken at least
for the Tegra DRM driver (and likely for others too) because driver
doesn't assume that anyone will try to mmap imported buffer and just
doesn't handle this case at all, so we're getting a hard lockup on
touching mapped memory because we're mapping something else than the
dma-buf.

My plan is to move the dma-buf management code to the level of DRM core
and make it aware of the reservation locks for the dynamic dma-bufs.
This way we will get the proper locking for dma-bufs and fix mapping of
imported dma-bufs for Tegra and other drivers.
Daniel Vetter May 5, 2022, 8:12 a.m. UTC | #7
On Wed, May 04, 2022 at 06:56:09PM +0300, Dmitry Osipenko wrote:
> On 5/4/22 11:21, Daniel Vetter wrote:
> ...
> >>> - Maybe also do what you suggest and keep a separate lock for this, but
> >>>   the fundamental issue is that this doesn't really work - if you share
> >>>   buffers both ways with two drivers using shmem helpers, then the
> >>>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
> >>>   you can get some nice deadlocks. So not a great approach (and also the
> >>>   reason why we really need to get everyone to move towards dma_resv_lock
> >>>   as _the_ buffer object lock, since otherwise we'll never get a
> >>>   consistent lock nesting hierarchy).
> >>
> >> The separate locks should work okay because it will be always the
> >> exporter that takes the dma_resv_lock. But I agree that it's less ideal
> >> than defining the new rules for dma-bufs since sometime you will take
> >> the resv lock and sometime not, potentially hiding bugs related to lockings.
> > 
> > That's the issue, some importers need to take the dma_resv_lock for
> > dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
> > dynamic memory manager). In practice it'll work as well as what we have
> > currently, which is similarly inconsistent, except with per-driver locks
> > instead of shared locks from shmem helpers or dma-buf, so less obvious
> > that things are inconsistent.
> > 
> > So yeah if it's too messy maybe the approach is to have a separate lock
> > for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
> > series.
> 
> The amdgpu driver was the fist who introduced the concept of movable
> memory for dma-bufs. Now we want to support it for DRM SHMEM too. For
> both amdgpu ttm and shmem drivers we will want to hold the reservation
> lock when we're touching moveable buffers. The current way of denoting
> that dma-buf is movable is to implement the pin/unpin callbacks of the
> dma-buf ops, should be doable for shmem.

Hm that sounds like a bridge too far? I don't think we want to start
adding moveable dma-bufs for shmem, thus far at least no one asked for
that. Goal here is just to streamline the locking a bit and align across
all the different ways of doing buffers in drm.

Or do you mean something else and I'm just completely lost?

> A day ago I found that mapping of imported dma-bufs is broken at least
> for the Tegra DRM driver (and likely for others too) because driver
> doesn't assume that anyone will try to mmap imported buffer and just
> doesn't handle this case at all, so we're getting a hard lockup on
> touching mapped memory because we're mapping something else than the
> dma-buf.

Huh that sounds bad, how does this happen? Pretty much all pieces of
dma-buf (cpu vmap, userspace mmap, heck even dma_buf_attach) are optional
or at least can fail for various reasons. So exporters not providing mmap
support is fine, but importers then dying is not.

> My plan is to move the dma-buf management code to the level of DRM core
> and make it aware of the reservation locks for the dynamic dma-bufs.
> This way we will get the proper locking for dma-bufs and fix mapping of
> imported dma-bufs for Tegra and other drivers.

So maybe we're completely talking past each another, or coffee is not
working here on my end, but I've no idea what you mean.

We do have some helpers for taking care of the dma_resv_lock dance, and
Christian König has an rfc patch set to maybe unify this further. But that
should be fairly orthogonal to reworking shmem (it might help a bit with
reworking shmem though).
-Daniel
Dmitry Osipenko May 5, 2022, 10:49 p.m. UTC | #8
On 5/5/22 11:12, Daniel Vetter wrote:
> On Wed, May 04, 2022 at 06:56:09PM +0300, Dmitry Osipenko wrote:
>> On 5/4/22 11:21, Daniel Vetter wrote:
>> ...
>>>>> - Maybe also do what you suggest and keep a separate lock for this, but
>>>>>   the fundamental issue is that this doesn't really work - if you share
>>>>>   buffers both ways with two drivers using shmem helpers, then the
>>>>>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
>>>>>   you can get some nice deadlocks. So not a great approach (and also the
>>>>>   reason why we really need to get everyone to move towards dma_resv_lock
>>>>>   as _the_ buffer object lock, since otherwise we'll never get a
>>>>>   consistent lock nesting hierarchy).
>>>>
>>>> The separate locks should work okay because it will be always the
>>>> exporter that takes the dma_resv_lock. But I agree that it's less ideal
>>>> than defining the new rules for dma-bufs since sometime you will take
>>>> the resv lock and sometime not, potentially hiding bugs related to lockings.
>>>
>>> That's the issue, some importers need to take the dma_resv_lock for
>>> dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
>>> dynamic memory manager). In practice it'll work as well as what we have
>>> currently, which is similarly inconsistent, except with per-driver locks
>>> instead of shared locks from shmem helpers or dma-buf, so less obvious
>>> that things are inconsistent.
>>>
>>> So yeah if it's too messy maybe the approach is to have a separate lock
>>> for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
>>> series.
>>
>> The amdgpu driver was the fist who introduced the concept of movable
>> memory for dma-bufs. Now we want to support it for DRM SHMEM too. For
>> both amdgpu ttm and shmem drivers we will want to hold the reservation
>> lock when we're touching moveable buffers. The current way of denoting
>> that dma-buf is movable is to implement the pin/unpin callbacks of the
>> dma-buf ops, should be doable for shmem.
> 
> Hm that sounds like a bridge too far? I don't think we want to start
> adding moveable dma-bufs for shmem, thus far at least no one asked for
> that. Goal here is just to streamline the locking a bit and align across
> all the different ways of doing buffers in drm.
> 
> Or do you mean something else and I'm just completely lost?

I'm talking about aligning DRM locks with the dma-buf locks. The problem
is that the convention of dma-bufs isn't specified yet. In particular
there is no convention for the mapping operations.

If we want to switch vmapping of shmem to use reservation lock, then
somebody will have to hold this lock for dma_buf_vmap() and the locking
convention needs to be specified firmly.

In case of dynamic buffers, we will also need to specify whether
dma_buf_vmap() should imply the implicit pinning by exporter or the
buffer must be pinned explicitly by importer before dma_buf_vmap() is
invoked.

Perhaps I indeed shouldn't care about this for this patchset. The
complete locking model of dma-bufs must be specified first.

>> A day ago I found that mapping of imported dma-bufs is broken at least
>> for the Tegra DRM driver (and likely for others too) because driver
>> doesn't assume that anyone will try to mmap imported buffer and just
>> doesn't handle this case at all, so we're getting a hard lockup on
>> touching mapped memory because we're mapping something else than the
>> dma-buf.
> 
> Huh that sounds bad, how does this happen? Pretty much all pieces of
> dma-buf (cpu vmap, userspace mmap, heck even dma_buf_attach) are optional
> or at least can fail for various reasons. So exporters not providing mmap
> support is fine, but importers then dying is not.

Those drivers that die don't have userspace that uses dma-bufs
extensively. I noticed it only because was looking at this code too much
for the last days.

Drivers that don't die either map imported BOs properly or don't allow
mapping at all.

>> My plan is to move the dma-buf management code to the level of DRM core
>> and make it aware of the reservation locks for the dynamic dma-bufs.
>> This way we will get the proper locking for dma-bufs and fix mapping of
>> imported dma-bufs for Tegra and other drivers.
> 
> So maybe we're completely talking past each another, or coffee is not
> working here on my end, but I've no idea what you mean.
> 
> We do have some helpers for taking care of the dma_resv_lock dance, and
> Christian König has an rfc patch set to maybe unify this further. But that
> should be fairly orthogonal to reworking shmem (it might help a bit with
> reworking shmem though).

The reservation lock itself doesn't help much shmem, IMO. It should help
only in the context of dynamic dma-bufs and today we don't have a need
in the dynamic shmem dma-bufs.

You were talking about making DRM locks consistent with dma-buf locks,
so I thought that yours main point of making use of reservation locks
for shmem is to prepare to the new locking scheme.

I wanted to try to specify the dma-buf locking convention for mapping
operations because it's missing right now and it should affect how DRM
should take the reservation locks, but this is not easy to do as I see now.

Could you please point at the Christian's RFC patch? He posted too many
patches, can't find it :) I'm curious to take a look.
Daniel Vetter May 9, 2022, 1:42 p.m. UTC | #9
On Fri, May 06, 2022 at 01:49:12AM +0300, Dmitry Osipenko wrote:
> On 5/5/22 11:12, Daniel Vetter wrote:
> > On Wed, May 04, 2022 at 06:56:09PM +0300, Dmitry Osipenko wrote:
> >> On 5/4/22 11:21, Daniel Vetter wrote:
> >> ...
> >>>>> - Maybe also do what you suggest and keep a separate lock for this, but
> >>>>>   the fundamental issue is that this doesn't really work - if you share
> >>>>>   buffers both ways with two drivers using shmem helpers, then the
> >>>>>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
> >>>>>   you can get some nice deadlocks. So not a great approach (and also the
> >>>>>   reason why we really need to get everyone to move towards dma_resv_lock
> >>>>>   as _the_ buffer object lock, since otherwise we'll never get a
> >>>>>   consistent lock nesting hierarchy).
> >>>>
> >>>> The separate locks should work okay because it will be always the
> >>>> exporter that takes the dma_resv_lock. But I agree that it's less ideal
> >>>> than defining the new rules for dma-bufs since sometime you will take
> >>>> the resv lock and sometime not, potentially hiding bugs related to lockings.
> >>>
> >>> That's the issue, some importers need to take the dma_resv_lock for
> >>> dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
> >>> dynamic memory manager). In practice it'll work as well as what we have
> >>> currently, which is similarly inconsistent, except with per-driver locks
> >>> instead of shared locks from shmem helpers or dma-buf, so less obvious
> >>> that things are inconsistent.
> >>>
> >>> So yeah if it's too messy maybe the approach is to have a separate lock
> >>> for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
> >>> series.
> >>
> >> The amdgpu driver was the fist who introduced the concept of movable
> >> memory for dma-bufs. Now we want to support it for DRM SHMEM too. For
> >> both amdgpu ttm and shmem drivers we will want to hold the reservation
> >> lock when we're touching moveable buffers. The current way of denoting
> >> that dma-buf is movable is to implement the pin/unpin callbacks of the
> >> dma-buf ops, should be doable for shmem.
> > 
> > Hm that sounds like a bridge too far? I don't think we want to start
> > adding moveable dma-bufs for shmem, thus far at least no one asked for
> > that. Goal here is just to streamline the locking a bit and align across
> > all the different ways of doing buffers in drm.
> > 
> > Or do you mean something else and I'm just completely lost?
> 
> I'm talking about aligning DRM locks with the dma-buf locks. The problem
> is that the convention of dma-bufs isn't specified yet. In particular
> there is no convention for the mapping operations.
> 
> If we want to switch vmapping of shmem to use reservation lock, then
> somebody will have to hold this lock for dma_buf_vmap() and the locking
> convention needs to be specified firmly.

Ah yes that makes sense.

> In case of dynamic buffers, we will also need to specify whether
> dma_buf_vmap() should imply the implicit pinning by exporter or the
> buffer must be pinned explicitly by importer before dma_buf_vmap() is
> invoked.
> 
> Perhaps I indeed shouldn't care about this for this patchset. The
> complete locking model of dma-bufs must be specified first.

Hm I thought vmap is meant to pin itself, and not rely on any other
pinning done already. And from a quick look through the long call chain
for amd (which is currently the only driver supporting dynamic dma-buf)
that seems to be the case.

But yeah the locking isn't specificied yet, and that makes it a bit a mess
:-(

> >> A day ago I found that mapping of imported dma-bufs is broken at least
> >> for the Tegra DRM driver (and likely for others too) because driver
> >> doesn't assume that anyone will try to mmap imported buffer and just
> >> doesn't handle this case at all, so we're getting a hard lockup on
> >> touching mapped memory because we're mapping something else than the
> >> dma-buf.
> > 
> > Huh that sounds bad, how does this happen? Pretty much all pieces of
> > dma-buf (cpu vmap, userspace mmap, heck even dma_buf_attach) are optional
> > or at least can fail for various reasons. So exporters not providing mmap
> > support is fine, but importers then dying is not.
> 
> Those drivers that die don't have userspace that uses dma-bufs
> extensively. I noticed it only because was looking at this code too much
> for the last days.
> 
> Drivers that don't die either map imported BOs properly or don't allow
> mapping at all.

Ah yeah driver bugs as explanation makes sense :-/

> >> My plan is to move the dma-buf management code to the level of DRM core
> >> and make it aware of the reservation locks for the dynamic dma-bufs.
> >> This way we will get the proper locking for dma-bufs and fix mapping of
> >> imported dma-bufs for Tegra and other drivers.
> > 
> > So maybe we're completely talking past each another, or coffee is not
> > working here on my end, but I've no idea what you mean.
> > 
> > We do have some helpers for taking care of the dma_resv_lock dance, and
> > Christian König has an rfc patch set to maybe unify this further. But that
> > should be fairly orthogonal to reworking shmem (it might help a bit with
> > reworking shmem though).
> 
> The reservation lock itself doesn't help much shmem, IMO. It should help
> only in the context of dynamic dma-bufs and today we don't have a need
> in the dynamic shmem dma-bufs.
> 
> You were talking about making DRM locks consistent with dma-buf locks,
> so I thought that yours main point of making use of reservation locks
> for shmem is to prepare to the new locking scheme.
> 
> I wanted to try to specify the dma-buf locking convention for mapping
> operations because it's missing right now and it should affect how DRM
> should take the reservation locks, but this is not easy to do as I see now.
> 
> Could you please point at the Christian's RFC patch? He posted too many
> patches, can't find it :) I'm curious to take a look.

https://lore.kernel.org/dri-devel/20220504074739.2231-1-christian.koenig@amd.com/

Wrt this patch series here I'm wondering whether we could do an interim
solution that side-steps the dma_buf_vmap mess.

- in shmem helpers pin any vmapped buffer (it's how dma-buf works too),
  and that pinning would be done under dma_resv_lock (like with other
  drivers using dma_resv_lock for bo protection)

- switch over everything else except vmap code to dma_resv_lock, but leave
  vmap locking as-is

- shrinker then only needs to trylock dma_resv_trylock in the shrinker,
  which can check for pinned buffer and that's good enough to exclude
  vmap'ed buffer. And it avoids mixing the vmap locking into the new
  shrinker code and driver interfaces.

This still leaves the vmap locking mess as-is, but I think that's a mess
that's orthogonal to shrinker work.

Thoughts?
-Daniel
Dmitry Osipenko May 10, 2022, 1:39 p.m. UTC | #10
On 5/9/22 16:42, Daniel Vetter wrote:
> On Fri, May 06, 2022 at 01:49:12AM +0300, Dmitry Osipenko wrote:
>> On 5/5/22 11:12, Daniel Vetter wrote:
>>> On Wed, May 04, 2022 at 06:56:09PM +0300, Dmitry Osipenko wrote:
>>>> On 5/4/22 11:21, Daniel Vetter wrote:
>>>> ...
>>>>>>> - Maybe also do what you suggest and keep a separate lock for this, but
>>>>>>>   the fundamental issue is that this doesn't really work - if you share
>>>>>>>   buffers both ways with two drivers using shmem helpers, then the
>>>>>>>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
>>>>>>>   you can get some nice deadlocks. So not a great approach (and also the
>>>>>>>   reason why we really need to get everyone to move towards dma_resv_lock
>>>>>>>   as _the_ buffer object lock, since otherwise we'll never get a
>>>>>>>   consistent lock nesting hierarchy).
>>>>>>
>>>>>> The separate locks should work okay because it will be always the
>>>>>> exporter that takes the dma_resv_lock. But I agree that it's less ideal
>>>>>> than defining the new rules for dma-bufs since sometime you will take
>>>>>> the resv lock and sometime not, potentially hiding bugs related to lockings.
>>>>>
>>>>> That's the issue, some importers need to take the dma_resv_lock for
>>>>> dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
>>>>> dynamic memory manager). In practice it'll work as well as what we have
>>>>> currently, which is similarly inconsistent, except with per-driver locks
>>>>> instead of shared locks from shmem helpers or dma-buf, so less obvious
>>>>> that things are inconsistent.
>>>>>
>>>>> So yeah if it's too messy maybe the approach is to have a separate lock
>>>>> for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
>>>>> series.
>>>>
>>>> The amdgpu driver was the fist who introduced the concept of movable
>>>> memory for dma-bufs. Now we want to support it for DRM SHMEM too. For
>>>> both amdgpu ttm and shmem drivers we will want to hold the reservation
>>>> lock when we're touching moveable buffers. The current way of denoting
>>>> that dma-buf is movable is to implement the pin/unpin callbacks of the
>>>> dma-buf ops, should be doable for shmem.
>>>
>>> Hm that sounds like a bridge too far? I don't think we want to start
>>> adding moveable dma-bufs for shmem, thus far at least no one asked for
>>> that. Goal here is just to streamline the locking a bit and align across
>>> all the different ways of doing buffers in drm.
>>>
>>> Or do you mean something else and I'm just completely lost?
>>
>> I'm talking about aligning DRM locks with the dma-buf locks. The problem
>> is that the convention of dma-bufs isn't specified yet. In particular
>> there is no convention for the mapping operations.
>>
>> If we want to switch vmapping of shmem to use reservation lock, then
>> somebody will have to hold this lock for dma_buf_vmap() and the locking
>> convention needs to be specified firmly.
> 
> Ah yes that makes sense.
> 
>> In case of dynamic buffers, we will also need to specify whether
>> dma_buf_vmap() should imply the implicit pinning by exporter or the
>> buffer must be pinned explicitly by importer before dma_buf_vmap() is
>> invoked.
>>
>> Perhaps I indeed shouldn't care about this for this patchset. The
>> complete locking model of dma-bufs must be specified first.
> 
> Hm I thought vmap is meant to pin itself, and not rely on any other
> pinning done already. And from a quick look through the long call chain
> for amd (which is currently the only driver supporting dynamic dma-buf)
> that seems to be the case.

The vmapping behaviour is implementation-defined until it's documented
explicitly, IMO.

> But yeah the locking isn't specificied yet, and that makes it a bit a mess
> :-(
> 
>>>> A day ago I found that mapping of imported dma-bufs is broken at least
>>>> for the Tegra DRM driver (and likely for others too) because driver
>>>> doesn't assume that anyone will try to mmap imported buffer and just
>>>> doesn't handle this case at all, so we're getting a hard lockup on
>>>> touching mapped memory because we're mapping something else than the
>>>> dma-buf.
>>>
>>> Huh that sounds bad, how does this happen? Pretty much all pieces of
>>> dma-buf (cpu vmap, userspace mmap, heck even dma_buf_attach) are optional
>>> or at least can fail for various reasons. So exporters not providing mmap
>>> support is fine, but importers then dying is not.
>>
>> Those drivers that die don't have userspace that uses dma-bufs
>> extensively. I noticed it only because was looking at this code too much
>> for the last days.
>>
>> Drivers that don't die either map imported BOs properly or don't allow
>> mapping at all.
> 
> Ah yeah driver bugs as explanation makes sense :-/
> 
>>>> My plan is to move the dma-buf management code to the level of DRM core
>>>> and make it aware of the reservation locks for the dynamic dma-bufs.
>>>> This way we will get the proper locking for dma-bufs and fix mapping of
>>>> imported dma-bufs for Tegra and other drivers.
>>>
>>> So maybe we're completely talking past each another, or coffee is not
>>> working here on my end, but I've no idea what you mean.
>>>
>>> We do have some helpers for taking care of the dma_resv_lock dance, and
>>> Christian König has an rfc patch set to maybe unify this further. But that
>>> should be fairly orthogonal to reworking shmem (it might help a bit with
>>> reworking shmem though).
>>
>> The reservation lock itself doesn't help much shmem, IMO. It should help
>> only in the context of dynamic dma-bufs and today we don't have a need
>> in the dynamic shmem dma-bufs.
>>
>> You were talking about making DRM locks consistent with dma-buf locks,
>> so I thought that yours main point of making use of reservation locks
>> for shmem is to prepare to the new locking scheme.
>>
>> I wanted to try to specify the dma-buf locking convention for mapping
>> operations because it's missing right now and it should affect how DRM
>> should take the reservation locks, but this is not easy to do as I see now.
>>
>> Could you please point at the Christian's RFC patch? He posted too many
>> patches, can't find it :) I'm curious to take a look.
> 
> https://lore.kernel.org/dri-devel/20220504074739.2231-1-christian.koenig@amd.com/
> 
> Wrt this patch series here I'm wondering whether we could do an interim
> solution that side-steps the dma_buf_vmap mess.
> 
> - in shmem helpers pin any vmapped buffer (it's how dma-buf works too),
>   and that pinning would be done under dma_resv_lock (like with other
>   drivers using dma_resv_lock for bo protection)
> 
> - switch over everything else except vmap code to dma_resv_lock, but leave
>   vmap locking as-is
> 
> - shrinker then only needs to trylock dma_resv_trylock in the shrinker,
>   which can check for pinned buffer and that's good enough to exclude
>   vmap'ed buffer. And it avoids mixing the vmap locking into the new
>   shrinker code and driver interfaces.
> 
> This still leaves the vmap locking mess as-is, but I think that's a mess
> that's orthogonal to shrinker work.
> 
> Thoughts?

Since vmapping implies implicit pinning, we can't use a separate lock in
drm_gem_shmem_vmap() because we need to protect the
drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
pin the pages and requires the dma_resv_lock to be locked.

Hence the problem is:

1. If dma-buf importer holds the dma_resv_lock and invokes
dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
not take the dma_resv_lock.

2. Since dma-buf locking convention isn't specified, we can't assume
that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().

The possible solutions are:

1. Specify the dma_resv_lock convention for dma-bufs and make all
drivers to follow it.

2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
Other non-DRM drivers will get the lockdep warning.

3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
if dma-buf importer holds the lock.

...

There are actually very few drivers in kernel that use dma_buf_vmap()
[1], so perhaps it's not really a big deal to first try to define the
locking and pinning convention for the dma-bufs? At least for
dma_buf_vmap()? Let me try to do this.

[1] https://elixir.bootlin.com/linux/v5.18-rc6/C/ident/dma_buf_vmap

I envision that the extra dma_resv_locks for dma-bufs potentially may
create unnecessary bottlenecks for some drivers if locking isn't really
necessary by a specific driver, so drivers will need to keep this in
mind. On the other hand, I don't think that any of the today's drivers
will notice the additional resv locks in practice.
Daniel Vetter May 11, 2022, 1 p.m. UTC | #11
On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
> On 5/9/22 16:42, Daniel Vetter wrote:
> > On Fri, May 06, 2022 at 01:49:12AM +0300, Dmitry Osipenko wrote:
> >> On 5/5/22 11:12, Daniel Vetter wrote:
> >>> On Wed, May 04, 2022 at 06:56:09PM +0300, Dmitry Osipenko wrote:
> >>>> On 5/4/22 11:21, Daniel Vetter wrote:
> >>>> ...
> >>>>>>> - Maybe also do what you suggest and keep a separate lock for this, but
> >>>>>>>   the fundamental issue is that this doesn't really work - if you share
> >>>>>>>   buffers both ways with two drivers using shmem helpers, then the
> >>>>>>>   ordering of this vmap_count_mutex vs dma_resv_lock is inconsistent and
> >>>>>>>   you can get some nice deadlocks. So not a great approach (and also the
> >>>>>>>   reason why we really need to get everyone to move towards dma_resv_lock
> >>>>>>>   as _the_ buffer object lock, since otherwise we'll never get a
> >>>>>>>   consistent lock nesting hierarchy).
> >>>>>>
> >>>>>> The separate locks should work okay because it will be always the
> >>>>>> exporter that takes the dma_resv_lock. But I agree that it's less ideal
> >>>>>> than defining the new rules for dma-bufs since sometime you will take
> >>>>>> the resv lock and sometime not, potentially hiding bugs related to lockings.
> >>>>>
> >>>>> That's the issue, some importers need to take the dma_resv_lock for
> >>>>> dma_buf_vmap too (e.g. to first nail the buffer in place when it's a
> >>>>> dynamic memory manager). In practice it'll work as well as what we have
> >>>>> currently, which is similarly inconsistent, except with per-driver locks
> >>>>> instead of shared locks from shmem helpers or dma-buf, so less obvious
> >>>>> that things are inconsistent.
> >>>>>
> >>>>> So yeah if it's too messy maybe the approach is to have a separate lock
> >>>>> for vmap for now, land things, and then fix up dma_buf_vmap in a follow up
> >>>>> series.
> >>>>
> >>>> The amdgpu driver was the fist who introduced the concept of movable
> >>>> memory for dma-bufs. Now we want to support it for DRM SHMEM too. For
> >>>> both amdgpu ttm and shmem drivers we will want to hold the reservation
> >>>> lock when we're touching moveable buffers. The current way of denoting
> >>>> that dma-buf is movable is to implement the pin/unpin callbacks of the
> >>>> dma-buf ops, should be doable for shmem.
> >>>
> >>> Hm that sounds like a bridge too far? I don't think we want to start
> >>> adding moveable dma-bufs for shmem, thus far at least no one asked for
> >>> that. Goal here is just to streamline the locking a bit and align across
> >>> all the different ways of doing buffers in drm.
> >>>
> >>> Or do you mean something else and I'm just completely lost?
> >>
> >> I'm talking about aligning DRM locks with the dma-buf locks. The problem
> >> is that the convention of dma-bufs isn't specified yet. In particular
> >> there is no convention for the mapping operations.
> >>
> >> If we want to switch vmapping of shmem to use reservation lock, then
> >> somebody will have to hold this lock for dma_buf_vmap() and the locking
> >> convention needs to be specified firmly.
> > 
> > Ah yes that makes sense.
> > 
> >> In case of dynamic buffers, we will also need to specify whether
> >> dma_buf_vmap() should imply the implicit pinning by exporter or the
> >> buffer must be pinned explicitly by importer before dma_buf_vmap() is
> >> invoked.
> >>
> >> Perhaps I indeed shouldn't care about this for this patchset. The
> >> complete locking model of dma-bufs must be specified first.
> > 
> > Hm I thought vmap is meant to pin itself, and not rely on any other
> > pinning done already. And from a quick look through the long call chain
> > for amd (which is currently the only driver supporting dynamic dma-buf)
> > that seems to be the case.
> 
> The vmapping behaviour is implementation-defined until it's documented
> explicitly, IMO.
> 
> > But yeah the locking isn't specificied yet, and that makes it a bit a mess
> > :-(
> > 
> >>>> A day ago I found that mapping of imported dma-bufs is broken at least
> >>>> for the Tegra DRM driver (and likely for others too) because driver
> >>>> doesn't assume that anyone will try to mmap imported buffer and just
> >>>> doesn't handle this case at all, so we're getting a hard lockup on
> >>>> touching mapped memory because we're mapping something else than the
> >>>> dma-buf.
> >>>
> >>> Huh that sounds bad, how does this happen? Pretty much all pieces of
> >>> dma-buf (cpu vmap, userspace mmap, heck even dma_buf_attach) are optional
> >>> or at least can fail for various reasons. So exporters not providing mmap
> >>> support is fine, but importers then dying is not.
> >>
> >> Those drivers that die don't have userspace that uses dma-bufs
> >> extensively. I noticed it only because was looking at this code too much
> >> for the last days.
> >>
> >> Drivers that don't die either map imported BOs properly or don't allow
> >> mapping at all.
> > 
> > Ah yeah driver bugs as explanation makes sense :-/
> > 
> >>>> My plan is to move the dma-buf management code to the level of DRM core
> >>>> and make it aware of the reservation locks for the dynamic dma-bufs.
> >>>> This way we will get the proper locking for dma-bufs and fix mapping of
> >>>> imported dma-bufs for Tegra and other drivers.
> >>>
> >>> So maybe we're completely talking past each another, or coffee is not
> >>> working here on my end, but I've no idea what you mean.
> >>>
> >>> We do have some helpers for taking care of the dma_resv_lock dance, and
> >>> Christian König has an rfc patch set to maybe unify this further. But that
> >>> should be fairly orthogonal to reworking shmem (it might help a bit with
> >>> reworking shmem though).
> >>
> >> The reservation lock itself doesn't help much shmem, IMO. It should help
> >> only in the context of dynamic dma-bufs and today we don't have a need
> >> in the dynamic shmem dma-bufs.
> >>
> >> You were talking about making DRM locks consistent with dma-buf locks,
> >> so I thought that yours main point of making use of reservation locks
> >> for shmem is to prepare to the new locking scheme.
> >>
> >> I wanted to try to specify the dma-buf locking convention for mapping
> >> operations because it's missing right now and it should affect how DRM
> >> should take the reservation locks, but this is not easy to do as I see now.
> >>
> >> Could you please point at the Christian's RFC patch? He posted too many
> >> patches, can't find it :) I'm curious to take a look.
> > 
> > https://lore.kernel.org/dri-devel/20220504074739.2231-1-christian.koenig@amd.com/
> > 
> > Wrt this patch series here I'm wondering whether we could do an interim
> > solution that side-steps the dma_buf_vmap mess.
> > 
> > - in shmem helpers pin any vmapped buffer (it's how dma-buf works too),
> >   and that pinning would be done under dma_resv_lock (like with other
> >   drivers using dma_resv_lock for bo protection)
> > 
> > - switch over everything else except vmap code to dma_resv_lock, but leave
> >   vmap locking as-is
> > 
> > - shrinker then only needs to trylock dma_resv_trylock in the shrinker,
> >   which can check for pinned buffer and that's good enough to exclude
> >   vmap'ed buffer. And it avoids mixing the vmap locking into the new
> >   shrinker code and driver interfaces.
> > 
> > This still leaves the vmap locking mess as-is, but I think that's a mess
> > that's orthogonal to shrinker work.
> > 
> > Thoughts?
> 
> Since vmapping implies implicit pinning, we can't use a separate lock in
> drm_gem_shmem_vmap() because we need to protect the
> drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
> pin the pages and requires the dma_resv_lock to be locked.
> 
> Hence the problem is:
> 
> 1. If dma-buf importer holds the dma_resv_lock and invokes
> dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
> not take the dma_resv_lock.
> 
> 2. Since dma-buf locking convention isn't specified, we can't assume
> that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
> 
> The possible solutions are:
> 
> 1. Specify the dma_resv_lock convention for dma-bufs and make all
> drivers to follow it.
> 
> 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
> Other non-DRM drivers will get the lockdep warning.
> 
> 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
> if dma-buf importer holds the lock.
> 
> ...

Yeah this is all very annoying.

> There are actually very few drivers in kernel that use dma_buf_vmap()
> [1], so perhaps it's not really a big deal to first try to define the
> locking and pinning convention for the dma-bufs? At least for
> dma_buf_vmap()? Let me try to do this.
> 
> [1] https://elixir.bootlin.com/linux/v5.18-rc6/C/ident/dma_buf_vmap

Yeah looking through the code there's largely two classes of drivers that
need vmap:

- display drivers that need to do cpu upload (usb, spi, i2c displays).
  Those generally set up the vmap at import time or when creating the
  drm_framebuffer object (e.g. see
  drm_gem_cma_prime_import_sg_table_vmap()), because that's really the
  only place where you can safely do that without running into locking
  inversion issues sooner or later

- lots of other drivers (and shmem helpers) seem to do dma_buf_vmap just
  because they can, but only actually ever use vmap on native objects,
  never on imported objects. Or at least I think so.

So maybe another approach here:

1. In general drivers which need a vmap need to set that up at dma_buf
import time - the same way we pin the buffers at import time for
non-dynamic importers because that's the only place where across all
drivers it's ok to just take dma_resv_lock.

2. We remove the "just because we can" dma_buf_vmap support from
helpers/drivers - the paths all already can cope with NULL since
dma_buf_vmap can fail. vmap will only work on native objects, not imported
ones.

3. If there is any driver using shmem helpers that absolutely needs vmap
to also work on imported it needs a special import function (like cma
helpers) which sets up the vmap at import time.

So since this is all very tricky ... what did I miss this time around?

> I envision that the extra dma_resv_locks for dma-bufs potentially may
> create unnecessary bottlenecks for some drivers if locking isn't really
> necessary by a specific driver, so drivers will need to keep this in
> mind. On the other hand, I don't think that any of the today's drivers
> will notice the additional resv locks in practice.

Nah I don't think the extra locking will ever create a bottleneck,
especially not for vmap. Generally vmap is a fallback or at least cpu
operation, so at that point you're already going very slow.
-Daniel
Christian König May 11, 2022, 2:24 p.m. UTC | #12
Am 11.05.22 um 15:00 schrieb Daniel Vetter:
> On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
>> [SNIP]
>> Since vmapping implies implicit pinning, we can't use a separate lock in
>> drm_gem_shmem_vmap() because we need to protect the
>> drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
>> pin the pages and requires the dma_resv_lock to be locked.
>>
>> Hence the problem is:
>>
>> 1. If dma-buf importer holds the dma_resv_lock and invokes
>> dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
>> not take the dma_resv_lock.
>>
>> 2. Since dma-buf locking convention isn't specified, we can't assume
>> that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
>>
>> The possible solutions are:
>>
>> 1. Specify the dma_resv_lock convention for dma-bufs and make all
>> drivers to follow it.
>>
>> 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
>> Other non-DRM drivers will get the lockdep warning.
>>
>> 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
>> if dma-buf importer holds the lock.
>>
>> ...
> Yeah this is all very annoying.

Ah, yes that topic again :)

I think we could relatively easily fix that by just defining and 
enforcing that the dma_resv_lock must have be taken by the caller when 
dma_buf_vmap() is called.

A two step approach should work:
1. Move the call to dma_resv_lock() into the dma_buf_vmap() function and 
remove all lock taking from the vmap callback implementations.
2. Move the call to dma_resv_lock() into the callers of dma_buf_vmap() 
and enforce that the function is called with the lock held.

It shouldn't be that hard to clean up. The last time I looked into it my 
main problem was that we didn't had any easy unit test for it.

Regards,
Christian.

>
>> There are actually very few drivers in kernel that use dma_buf_vmap()
>> [1], so perhaps it's not really a big deal to first try to define the
>> locking and pinning convention for the dma-bufs? At least for
>> dma_buf_vmap()? Let me try to do this.
>>
>> [1] https://elixir.bootlin.com/linux/v5.18-rc6/C/ident/dma_buf_vmap
> Yeah looking through the code there's largely two classes of drivers that
> need vmap:
>
> - display drivers that need to do cpu upload (usb, spi, i2c displays).
>    Those generally set up the vmap at import time or when creating the
>    drm_framebuffer object (e.g. see
>    drm_gem_cma_prime_import_sg_table_vmap()), because that's really the
>    only place where you can safely do that without running into locking
>    inversion issues sooner or later
>
> - lots of other drivers (and shmem helpers) seem to do dma_buf_vmap just
>    because they can, but only actually ever use vmap on native objects,
>    never on imported objects. Or at least I think so.
>
> So maybe another approach here:
>
> 1. In general drivers which need a vmap need to set that up at dma_buf
> import time - the same way we pin the buffers at import time for
> non-dynamic importers because that's the only place where across all
> drivers it's ok to just take dma_resv_lock.
>
> 2. We remove the "just because we can" dma_buf_vmap support from
> helpers/drivers - the paths all already can cope with NULL since
> dma_buf_vmap can fail. vmap will only work on native objects, not imported
> ones.
>
> 3. If there is any driver using shmem helpers that absolutely needs vmap
> to also work on imported it needs a special import function (like cma
> helpers) which sets up the vmap at import time.
>
> So since this is all very tricky ... what did I miss this time around?
>
>> I envision that the extra dma_resv_locks for dma-bufs potentially may
>> create unnecessary bottlenecks for some drivers if locking isn't really
>> necessary by a specific driver, so drivers will need to keep this in
>> mind. On the other hand, I don't think that any of the today's drivers
>> will notice the additional resv locks in practice.
> Nah I don't think the extra locking will ever create a bottleneck,
> especially not for vmap. Generally vmap is a fallback or at least cpu
> operation, so at that point you're already going very slow.
> -Daniel
Daniel Vetter May 11, 2022, 3:07 p.m. UTC | #13
On Wed, May 11, 2022 at 04:24:28PM +0200, Christian König wrote:
> Am 11.05.22 um 15:00 schrieb Daniel Vetter:
> > On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
> > > [SNIP]
> > > Since vmapping implies implicit pinning, we can't use a separate lock in
> > > drm_gem_shmem_vmap() because we need to protect the
> > > drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
> > > pin the pages and requires the dma_resv_lock to be locked.
> > > 
> > > Hence the problem is:
> > > 
> > > 1. If dma-buf importer holds the dma_resv_lock and invokes
> > > dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
> > > not take the dma_resv_lock.
> > > 
> > > 2. Since dma-buf locking convention isn't specified, we can't assume
> > > that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
> > > 
> > > The possible solutions are:
> > > 
> > > 1. Specify the dma_resv_lock convention for dma-bufs and make all
> > > drivers to follow it.
> > > 
> > > 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
> > > Other non-DRM drivers will get the lockdep warning.
> > > 
> > > 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
> > > if dma-buf importer holds the lock.
> > > 
> > > ...
> > Yeah this is all very annoying.
> 
> Ah, yes that topic again :)
> 
> I think we could relatively easily fix that by just defining and enforcing
> that the dma_resv_lock must have be taken by the caller when dma_buf_vmap()
> is called.
> 
> A two step approach should work:
> 1. Move the call to dma_resv_lock() into the dma_buf_vmap() function and
> remove all lock taking from the vmap callback implementations.
> 2. Move the call to dma_resv_lock() into the callers of dma_buf_vmap() and
> enforce that the function is called with the lock held.
> 
> It shouldn't be that hard to clean up. The last time I looked into it my
> main problem was that we didn't had any easy unit test for it.

Yeah I think it's doable or at least a lot less work than the map/unmap
side, which really was unfixable without just pinning at import time to
avoid the locking fun. But vmap is used a lot less, and mostly by display
drivers (where locking is a lot easier against dma_resv_lock), so it might
be possible to pull off.
-Daniel

> 
> Regards,
> Christian.
> 
> > 
> > > There are actually very few drivers in kernel that use dma_buf_vmap()
> > > [1], so perhaps it's not really a big deal to first try to define the
> > > locking and pinning convention for the dma-bufs? At least for
> > > dma_buf_vmap()? Let me try to do this.
> > > 
> > > [1] https://elixir.bootlin.com/linux/v5.18-rc6/C/ident/dma_buf_vmap
> > Yeah looking through the code there's largely two classes of drivers that
> > need vmap:
> > 
> > - display drivers that need to do cpu upload (usb, spi, i2c displays).
> >    Those generally set up the vmap at import time or when creating the
> >    drm_framebuffer object (e.g. see
> >    drm_gem_cma_prime_import_sg_table_vmap()), because that's really the
> >    only place where you can safely do that without running into locking
> >    inversion issues sooner or later
> > 
> > - lots of other drivers (and shmem helpers) seem to do dma_buf_vmap just
> >    because they can, but only actually ever use vmap on native objects,
> >    never on imported objects. Or at least I think so.
> > 
> > So maybe another approach here:
> > 
> > 1. In general drivers which need a vmap need to set that up at dma_buf
> > import time - the same way we pin the buffers at import time for
> > non-dynamic importers because that's the only place where across all
> > drivers it's ok to just take dma_resv_lock.
> > 
> > 2. We remove the "just because we can" dma_buf_vmap support from
> > helpers/drivers - the paths all already can cope with NULL since
> > dma_buf_vmap can fail. vmap will only work on native objects, not imported
> > ones.
> > 
> > 3. If there is any driver using shmem helpers that absolutely needs vmap
> > to also work on imported it needs a special import function (like cma
> > helpers) which sets up the vmap at import time.
> > 
> > So since this is all very tricky ... what did I miss this time around?
> > 
> > > I envision that the extra dma_resv_locks for dma-bufs potentially may
> > > create unnecessary bottlenecks for some drivers if locking isn't really
> > > necessary by a specific driver, so drivers will need to keep this in
> > > mind. On the other hand, I don't think that any of the today's drivers
> > > will notice the additional resv locks in practice.
> > Nah I don't think the extra locking will ever create a bottleneck,
> > especially not for vmap. Generally vmap is a fallback or at least cpu
> > operation, so at that point you're already going very slow.
> > -Daniel
>
Dmitry Osipenko May 11, 2022, 3:14 p.m. UTC | #14
On 5/11/22 17:24, Christian König wrote:
> Am 11.05.22 um 15:00 schrieb Daniel Vetter:
>> On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
>>> [SNIP]
>>> Since vmapping implies implicit pinning, we can't use a separate lock in
>>> drm_gem_shmem_vmap() because we need to protect the
>>> drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
>>> pin the pages and requires the dma_resv_lock to be locked.
>>>
>>> Hence the problem is:
>>>
>>> 1. If dma-buf importer holds the dma_resv_lock and invokes
>>> dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
>>> not take the dma_resv_lock.
>>>
>>> 2. Since dma-buf locking convention isn't specified, we can't assume
>>> that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
>>>
>>> The possible solutions are:
>>>
>>> 1. Specify the dma_resv_lock convention for dma-bufs and make all
>>> drivers to follow it.
>>>
>>> 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
>>> Other non-DRM drivers will get the lockdep warning.
>>>
>>> 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
>>> if dma-buf importer holds the lock.
>>>
>>> ...
>> Yeah this is all very annoying.
> 
> Ah, yes that topic again :)
> 
> I think we could relatively easily fix that by just defining and
> enforcing that the dma_resv_lock must have be taken by the caller when
> dma_buf_vmap() is called.
> 
> A two step approach should work:
> 1. Move the call to dma_resv_lock() into the dma_buf_vmap() function and
> remove all lock taking from the vmap callback implementations.
> 2. Move the call to dma_resv_lock() into the callers of dma_buf_vmap()
> and enforce that the function is called with the lock held.

I've doubts about the need to move out the dma_resv_lock() into the
callers of dma_buf_vmap()..

I looked through all the dma_buf_vmap() users and neither of them
interacts with dma_resv_lock() at all, i.e. nobody takes the lock
in/outside of dma_buf_vmap(). Hence it's easy and more practical to make
dma_buf_mmap/vmap() to take the dma_resv_lock by themselves.

It's unclear to me which driver may ever want to do the mapping under
the dma_resv_lock. But if we will ever have such a driver that will need
to map imported buffer under dma_resv_lock, then we could always add the
dma_buf_vmap_locked() variant of the function. In this case the locking
rule will sound like this:

"All dma-buf importers are responsible for holding the dma-reservation
lock around the dmabuf->ops->mmap/vmap() calls."

> It shouldn't be that hard to clean up. The last time I looked into it my
> main problem was that we didn't had any easy unit test for it.

Do we have any tests for dma-bufs at all? It's unclear to me what you
are going to test in regards to the reservation locks, could you please
clarify?
Daniel Vetter May 11, 2022, 3:29 p.m. UTC | #15
On Wed, May 11, 2022 at 06:14:00PM +0300, Dmitry Osipenko wrote:
> On 5/11/22 17:24, Christian König wrote:
> > Am 11.05.22 um 15:00 schrieb Daniel Vetter:
> >> On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
> >>> [SNIP]
> >>> Since vmapping implies implicit pinning, we can't use a separate lock in
> >>> drm_gem_shmem_vmap() because we need to protect the
> >>> drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
> >>> pin the pages and requires the dma_resv_lock to be locked.
> >>>
> >>> Hence the problem is:
> >>>
> >>> 1. If dma-buf importer holds the dma_resv_lock and invokes
> >>> dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
> >>> not take the dma_resv_lock.
> >>>
> >>> 2. Since dma-buf locking convention isn't specified, we can't assume
> >>> that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
> >>>
> >>> The possible solutions are:
> >>>
> >>> 1. Specify the dma_resv_lock convention for dma-bufs and make all
> >>> drivers to follow it.
> >>>
> >>> 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
> >>> Other non-DRM drivers will get the lockdep warning.
> >>>
> >>> 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
> >>> if dma-buf importer holds the lock.
> >>>
> >>> ...
> >> Yeah this is all very annoying.
> > 
> > Ah, yes that topic again :)
> > 
> > I think we could relatively easily fix that by just defining and
> > enforcing that the dma_resv_lock must have be taken by the caller when
> > dma_buf_vmap() is called.
> > 
> > A two step approach should work:
> > 1. Move the call to dma_resv_lock() into the dma_buf_vmap() function and
> > remove all lock taking from the vmap callback implementations.
> > 2. Move the call to dma_resv_lock() into the callers of dma_buf_vmap()
> > and enforce that the function is called with the lock held.
> 
> I've doubts about the need to move out the dma_resv_lock() into the
> callers of dma_buf_vmap()..
> 
> I looked through all the dma_buf_vmap() users and neither of them
> interacts with dma_resv_lock() at all, i.e. nobody takes the lock
> in/outside of dma_buf_vmap(). Hence it's easy and more practical to make
> dma_buf_mmap/vmap() to take the dma_resv_lock by themselves.

i915_gem_dmabuf_vmap -> i915_gem_object_pin_map_unlocked ->
  i915_gem_object_lock -> dma_resv_lock

And all the ttm drivers should work similarly. So there's definitely
drivers which grab dma_resv_lock from their vmap callback.

> It's unclear to me which driver may ever want to do the mapping under
> the dma_resv_lock. But if we will ever have such a driver that will need
> to map imported buffer under dma_resv_lock, then we could always add the
> dma_buf_vmap_locked() variant of the function. In this case the locking
> rule will sound like this:
> 
> "All dma-buf importers are responsible for holding the dma-reservation
> lock around the dmabuf->ops->mmap/vmap() calls."
> 
> > It shouldn't be that hard to clean up. The last time I looked into it my
> > main problem was that we didn't had any easy unit test for it.
> 
> Do we have any tests for dma-bufs at all? It's unclear to me what you
> are going to test in regards to the reservation locks, could you please
> clarify?

Unfortunately not really :-/ Only way really is to grab a driver which
needs vmap (those are mostly display drivers) on an imported buffer, and
see what happens.

2nd best is liberally sprinkling lockdep annotations all over the place
and throwing it at intel ci (not sure amd ci is accessible to the public)
and then hoping that's good enough. Stuff like might_lock and
dma_resv_assert_held.
-Daniel
Dmitry Osipenko May 11, 2022, 3:40 p.m. UTC | #16
On 5/11/22 18:29, Daniel Vetter wrote:
> On Wed, May 11, 2022 at 06:14:00PM +0300, Dmitry Osipenko wrote:
>> On 5/11/22 17:24, Christian König wrote:
>>> Am 11.05.22 um 15:00 schrieb Daniel Vetter:
>>>> On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
>>>>> [SNIP]
>>>>> Since vmapping implies implicit pinning, we can't use a separate lock in
>>>>> drm_gem_shmem_vmap() because we need to protect the
>>>>> drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
>>>>> pin the pages and requires the dma_resv_lock to be locked.
>>>>>
>>>>> Hence the problem is:
>>>>>
>>>>> 1. If dma-buf importer holds the dma_resv_lock and invokes
>>>>> dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
>>>>> not take the dma_resv_lock.
>>>>>
>>>>> 2. Since dma-buf locking convention isn't specified, we can't assume
>>>>> that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
>>>>>
>>>>> The possible solutions are:
>>>>>
>>>>> 1. Specify the dma_resv_lock convention for dma-bufs and make all
>>>>> drivers to follow it.
>>>>>
>>>>> 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
>>>>> Other non-DRM drivers will get the lockdep warning.
>>>>>
>>>>> 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
>>>>> if dma-buf importer holds the lock.
>>>>>
>>>>> ...
>>>> Yeah this is all very annoying.
>>> Ah, yes that topic again :)
>>>
>>> I think we could relatively easily fix that by just defining and
>>> enforcing that the dma_resv_lock must have be taken by the caller when
>>> dma_buf_vmap() is called.
>>>
>>> A two step approach should work:
>>> 1. Move the call to dma_resv_lock() into the dma_buf_vmap() function and
>>> remove all lock taking from the vmap callback implementations.
>>> 2. Move the call to dma_resv_lock() into the callers of dma_buf_vmap()
>>> and enforce that the function is called with the lock held.
>> I've doubts about the need to move out the dma_resv_lock() into the
>> callers of dma_buf_vmap()..
>>
>> I looked through all the dma_buf_vmap() users and neither of them
>> interacts with dma_resv_lock() at all, i.e. nobody takes the lock
>> in/outside of dma_buf_vmap(). Hence it's easy and more practical to make
>> dma_buf_mmap/vmap() to take the dma_resv_lock by themselves.
> i915_gem_dmabuf_vmap -> i915_gem_object_pin_map_unlocked ->
>   i915_gem_object_lock -> dma_resv_lock
> 
> And all the ttm drivers should work similarly. So there's definitely
> drivers which grab dma_resv_lock from their vmap callback.

Grr.. I'll take another look.

>> It's unclear to me which driver may ever want to do the mapping under
>> the dma_resv_lock. But if we will ever have such a driver that will need
>> to map imported buffer under dma_resv_lock, then we could always add the
>> dma_buf_vmap_locked() variant of the function. In this case the locking
>> rule will sound like this:
>>
>> "All dma-buf importers are responsible for holding the dma-reservation
>> lock around the dmabuf->ops->mmap/vmap() calls."

Are you okay with this rule?

>>> It shouldn't be that hard to clean up. The last time I looked into it my
>>> main problem was that we didn't had any easy unit test for it.
>> Do we have any tests for dma-bufs at all? It's unclear to me what you
>> are going to test in regards to the reservation locks, could you please
>> clarify?
> Unfortunately not really :-/ Only way really is to grab a driver which
> needs vmap (those are mostly display drivers) on an imported buffer, and
> see what happens.
> 
> 2nd best is liberally sprinkling lockdep annotations all over the place
> and throwing it at intel ci (not sure amd ci is accessible to the public)
> and then hoping that's good enough. Stuff like might_lock and
> dma_resv_assert_held.

Alright
Daniel Vetter May 11, 2022, 7:05 p.m. UTC | #17
On Wed, May 11, 2022 at 06:40:32PM +0300, Dmitry Osipenko wrote:
> On 5/11/22 18:29, Daniel Vetter wrote:
> > On Wed, May 11, 2022 at 06:14:00PM +0300, Dmitry Osipenko wrote:
> >> On 5/11/22 17:24, Christian König wrote:
> >>> Am 11.05.22 um 15:00 schrieb Daniel Vetter:
> >>>> On Tue, May 10, 2022 at 04:39:53PM +0300, Dmitry Osipenko wrote:
> >>>>> [SNIP]
> >>>>> Since vmapping implies implicit pinning, we can't use a separate lock in
> >>>>> drm_gem_shmem_vmap() because we need to protect the
> >>>>> drm_gem_shmem_get_pages(), which is invoked by drm_gem_shmem_vmap() to
> >>>>> pin the pages and requires the dma_resv_lock to be locked.
> >>>>>
> >>>>> Hence the problem is:
> >>>>>
> >>>>> 1. If dma-buf importer holds the dma_resv_lock and invokes
> >>>>> dma_buf_vmap() -> drm_gem_shmem_vmap(), then drm_gem_shmem_vmap() shall
> >>>>> not take the dma_resv_lock.
> >>>>>
> >>>>> 2. Since dma-buf locking convention isn't specified, we can't assume
> >>>>> that dma-buf importer holds the dma_resv_lock around dma_buf_vmap().
> >>>>>
> >>>>> The possible solutions are:
> >>>>>
> >>>>> 1. Specify the dma_resv_lock convention for dma-bufs and make all
> >>>>> drivers to follow it.
> >>>>>
> >>>>> 2. Make only DRM drivers to hold dma_resv_lock around dma_buf_vmap().
> >>>>> Other non-DRM drivers will get the lockdep warning.
> >>>>>
> >>>>> 3. Make drm_gem_shmem_vmap() to take the dma_resv_lock and get deadlock
> >>>>> if dma-buf importer holds the lock.
> >>>>>
> >>>>> ...
> >>>> Yeah this is all very annoying.
> >>> Ah, yes that topic again :)
> >>>
> >>> I think we could relatively easily fix that by just defining and
> >>> enforcing that the dma_resv_lock must have be taken by the caller when
> >>> dma_buf_vmap() is called.
> >>>
> >>> A two step approach should work:
> >>> 1. Move the call to dma_resv_lock() into the dma_buf_vmap() function and
> >>> remove all lock taking from the vmap callback implementations.
> >>> 2. Move the call to dma_resv_lock() into the callers of dma_buf_vmap()
> >>> and enforce that the function is called with the lock held.
> >> I've doubts about the need to move out the dma_resv_lock() into the
> >> callers of dma_buf_vmap()..
> >>
> >> I looked through all the dma_buf_vmap() users and neither of them
> >> interacts with dma_resv_lock() at all, i.e. nobody takes the lock
> >> in/outside of dma_buf_vmap(). Hence it's easy and more practical to make
> >> dma_buf_mmap/vmap() to take the dma_resv_lock by themselves.
> > i915_gem_dmabuf_vmap -> i915_gem_object_pin_map_unlocked ->
> >   i915_gem_object_lock -> dma_resv_lock
> > 
> > And all the ttm drivers should work similarly. So there's definitely
> > drivers which grab dma_resv_lock from their vmap callback.
> 
> Grr.. I'll take another look.
> 
> >> It's unclear to me which driver may ever want to do the mapping under
> >> the dma_resv_lock. But if we will ever have such a driver that will need
> >> to map imported buffer under dma_resv_lock, then we could always add the
> >> dma_buf_vmap_locked() variant of the function. In this case the locking
> >> rule will sound like this:
> >>
> >> "All dma-buf importers are responsible for holding the dma-reservation
> >> lock around the dmabuf->ops->mmap/vmap() calls."
> 
> Are you okay with this rule?

Yeah I think long-term it's where we want to be, just trying to find
clever ways to get there.

And I think Christian agrees with that?

> >>> It shouldn't be that hard to clean up. The last time I looked into it my
> >>> main problem was that we didn't had any easy unit test for it.
> >> Do we have any tests for dma-bufs at all? It's unclear to me what you
> >> are going to test in regards to the reservation locks, could you please
> >> clarify?
> > Unfortunately not really :-/ Only way really is to grab a driver which
> > needs vmap (those are mostly display drivers) on an imported buffer, and
> > see what happens.
> > 
> > 2nd best is liberally sprinkling lockdep annotations all over the place
> > and throwing it at intel ci (not sure amd ci is accessible to the public)
> > and then hoping that's good enough. Stuff like might_lock and
> > dma_resv_assert_held.
> 
> Alright

So throwing it at intel-gfx-ci can't hurt I think, but that only covers
i915 so doesn't really help with the bigger issue of catching all the
drivers.

Cheers, Daniel
Christian König May 12, 2022, 7:29 a.m. UTC | #18
Am 11.05.22 um 21:05 schrieb Daniel Vetter:
> [SNIP]
>>>> It's unclear to me which driver may ever want to do the mapping under
>>>> the dma_resv_lock. But if we will ever have such a driver that will need
>>>> to map imported buffer under dma_resv_lock, then we could always add the
>>>> dma_buf_vmap_locked() variant of the function. In this case the locking
>>>> rule will sound like this:
>>>>
>>>> "All dma-buf importers are responsible for holding the dma-reservation
>>>> lock around the dmabuf->ops->mmap/vmap() calls."
>> Are you okay with this rule?
> Yeah I think long-term it's where we want to be, just trying to find
> clever ways to get there.
>
> And I think Christian agrees with that?

Yes, completely.

A design where most DMA-buf functions are supposed to be called with the 
reservation lock held is exactly what I have in mind for the long term.

>>>>> It shouldn't be that hard to clean up. The last time I looked into it my
>>>>> main problem was that we didn't had any easy unit test for it.
>>>> Do we have any tests for dma-bufs at all? It's unclear to me what you
>>>> are going to test in regards to the reservation locks, could you please
>>>> clarify?
>>> Unfortunately not really :-/ Only way really is to grab a driver which
>>> needs vmap (those are mostly display drivers) on an imported buffer, and
>>> see what happens.
>>>
>>> 2nd best is liberally sprinkling lockdep annotations all over the place
>>> and throwing it at intel ci (not sure amd ci is accessible to the public)
>>> and then hoping that's good enough. Stuff like might_lock and
>>> dma_resv_assert_held.
>> Alright
> So throwing it at intel-gfx-ci can't hurt I think, but that only covers
> i915 so doesn't really help with the bigger issue of catching all the
> drivers.

BTW: We have now somebody working on converting the existing 
libdrm_amdgpu unit tests over to igt.

Regards,
Christian.

>
> Cheers, Daniel
Daniel Vetter May 12, 2022, 2:15 p.m. UTC | #19
On Thu, May 12, 2022 at 09:29:35AM +0200, Christian König wrote:
> Am 11.05.22 um 21:05 schrieb Daniel Vetter:
> > [SNIP]
> > > > > It's unclear to me which driver may ever want to do the mapping under
> > > > > the dma_resv_lock. But if we will ever have such a driver that will need
> > > > > to map imported buffer under dma_resv_lock, then we could always add the
> > > > > dma_buf_vmap_locked() variant of the function. In this case the locking
> > > > > rule will sound like this:
> > > > > 
> > > > > "All dma-buf importers are responsible for holding the dma-reservation
> > > > > lock around the dmabuf->ops->mmap/vmap() calls."
> > > Are you okay with this rule?
> > Yeah I think long-term it's where we want to be, just trying to find
> > clever ways to get there.
> > 
> > And I think Christian agrees with that?
> 
> Yes, completely.
> 
> A design where most DMA-buf functions are supposed to be called with the
> reservation lock held is exactly what I have in mind for the long term.
> 
> > > > > > It shouldn't be that hard to clean up. The last time I looked into it my
> > > > > > main problem was that we didn't had any easy unit test for it.
> > > > > Do we have any tests for dma-bufs at all? It's unclear to me what you
> > > > > are going to test in regards to the reservation locks, could you please
> > > > > clarify?
> > > > Unfortunately not really :-/ Only way really is to grab a driver which
> > > > needs vmap (those are mostly display drivers) on an imported buffer, and
> > > > see what happens.
> > > > 
> > > > 2nd best is liberally sprinkling lockdep annotations all over the place
> > > > and throwing it at intel ci (not sure amd ci is accessible to the public)
> > > > and then hoping that's good enough. Stuff like might_lock and
> > > > dma_resv_assert_held.
> > > Alright
> > So throwing it at intel-gfx-ci can't hurt I think, but that only covers
> > i915 so doesn't really help with the bigger issue of catching all the
> > drivers.
> 
> BTW: We have now somebody working on converting the existing libdrm_amdgpu
> unit tests over to igt.

This sounds awesome.

/me throws a happy dance

Cheers, Daniel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 30ee46348a99..3ecef571eff3 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -86,8 +86,6 @@  __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private)
 	if (ret)
 		goto err_release;
 
-	mutex_init(&shmem->pages_lock);
-	mutex_init(&shmem->vmap_lock);
 	INIT_LIST_HEAD(&shmem->madv_list);
 
 	if (!private) {
@@ -157,8 +155,6 @@  void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
 	WARN_ON(shmem->pages_use_count);
 
 	drm_gem_object_release(obj);
-	mutex_destroy(&shmem->pages_lock);
-	mutex_destroy(&shmem->vmap_lock);
 	kfree(shmem);
 }
 EXPORT_SYMBOL_GPL(drm_gem_shmem_free);
@@ -209,11 +205,11 @@  int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem)
 
 	WARN_ON(shmem->base.import_attach);
 
-	ret = mutex_lock_interruptible(&shmem->pages_lock);
+	ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
 	if (ret)
 		return ret;
 	ret = drm_gem_shmem_get_pages_locked(shmem);
-	mutex_unlock(&shmem->pages_lock);
+	dma_resv_unlock(shmem->base.resv);
 
 	return ret;
 }
@@ -248,9 +244,9 @@  static void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem)
  */
 void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem)
 {
-	mutex_lock(&shmem->pages_lock);
+	dma_resv_lock(shmem->base.resv, NULL);
 	drm_gem_shmem_put_pages_locked(shmem);
-	mutex_unlock(&shmem->pages_lock);
+	dma_resv_unlock(shmem->base.resv);
 }
 EXPORT_SYMBOL(drm_gem_shmem_put_pages);
 
@@ -310,7 +306,7 @@  static int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem,
 	} else {
 		pgprot_t prot = PAGE_KERNEL;
 
-		ret = drm_gem_shmem_get_pages(shmem);
+		ret = drm_gem_shmem_get_pages_locked(shmem);
 		if (ret)
 			goto err_zero_use;
 
@@ -360,11 +356,11 @@  int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem,
 {
 	int ret;
 
-	ret = mutex_lock_interruptible(&shmem->vmap_lock);
+	ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
 	if (ret)
 		return ret;
 	ret = drm_gem_shmem_vmap_locked(shmem, map);
-	mutex_unlock(&shmem->vmap_lock);
+	dma_resv_unlock(shmem->base.resv);
 
 	return ret;
 }
@@ -385,7 +381,7 @@  static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
 		dma_buf_vunmap(obj->import_attach->dmabuf, map);
 	} else {
 		vunmap(shmem->vaddr);
-		drm_gem_shmem_put_pages(shmem);
+		drm_gem_shmem_put_pages_locked(shmem);
 	}
 
 	shmem->vaddr = NULL;
@@ -406,9 +402,11 @@  static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
 void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem,
 			  struct iosys_map *map)
 {
-	mutex_lock(&shmem->vmap_lock);
+	dma_resv_lock(shmem->base.resv, NULL);
 	drm_gem_shmem_vunmap_locked(shmem, map);
-	mutex_unlock(&shmem->vmap_lock);
+	dma_resv_unlock(shmem->base.resv);
+
+	drm_gem_shmem_update_purgeable_status(shmem);
 }
 EXPORT_SYMBOL(drm_gem_shmem_vunmap);
 
@@ -442,14 +440,14 @@  drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
  */
 int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv)
 {
-	mutex_lock(&shmem->pages_lock);
+	dma_resv_lock(shmem->base.resv, NULL);
 
 	if (shmem->madv >= 0)
 		shmem->madv = madv;
 
 	madv = shmem->madv;
 
-	mutex_unlock(&shmem->pages_lock);
+	dma_resv_unlock(shmem->base.resv);
 
 	return (madv >= 0);
 }
@@ -487,10 +485,10 @@  EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
 
 bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem)
 {
-	if (!mutex_trylock(&shmem->pages_lock))
+	if (!dma_resv_trylock(shmem->base.resv))
 		return false;
 	drm_gem_shmem_purge_locked(shmem);
-	mutex_unlock(&shmem->pages_lock);
+	dma_resv_unlock(shmem->base.resv);
 
 	return true;
 }
@@ -549,7 +547,7 @@  static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
-	mutex_lock(&shmem->pages_lock);
+	dma_resv_lock(shmem->base.resv, NULL);
 
 	if (page_offset >= num_pages ||
 	    WARN_ON_ONCE(!shmem->pages) ||
@@ -561,7 +559,7 @@  static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
 		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
 	}
 
-	mutex_unlock(&shmem->pages_lock);
+	dma_resv_unlock(shmem->base.resv);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
index 0f1ca0b0db49..5008f0c2428f 100644
--- a/drivers/gpu/drm/lima/lima_gem.c
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -34,7 +34,7 @@  int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm)
 
 	new_size = min(new_size, bo->base.base.size);
 
-	mutex_lock(&bo->base.pages_lock);
+	dma_resv_lock(bo->base.base.resv, NULL);
 
 	if (bo->base.pages) {
 		pages = bo->base.pages;
@@ -42,7 +42,7 @@  int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm)
 		pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
 				       sizeof(*pages), GFP_KERNEL | __GFP_ZERO);
 		if (!pages) {
-			mutex_unlock(&bo->base.pages_lock);
+			dma_resv_unlock(bo->base.base.resv);
 			return -ENOMEM;
 		}
 
@@ -56,13 +56,13 @@  int lima_heap_alloc(struct lima_bo *bo, struct lima_vm *vm)
 		struct page *page = shmem_read_mapping_page(mapping, i);
 
 		if (IS_ERR(page)) {
-			mutex_unlock(&bo->base.pages_lock);
+			dma_resv_unlock(bo->base.base.resv);
 			return PTR_ERR(page);
 		}
 		pages[i] = page;
 	}
 
-	mutex_unlock(&bo->base.pages_lock);
+	dma_resv_unlock(bo->base.base.resv);
 
 	ret = sg_alloc_table_from_pages(&sgt, pages, i, 0,
 					new_size, GFP_KERNEL);
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index d3f82b26a631..404b8f67e2df 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -424,6 +424,7 @@  static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
 	struct panfrost_gem_mapping *bomapping;
 	struct panfrost_gem_object *bo;
 	struct address_space *mapping;
+	struct drm_gem_object *obj;
 	pgoff_t page_offset;
 	struct sg_table *sgt;
 	struct page **pages;
@@ -446,13 +447,15 @@  static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
 	page_offset = addr >> PAGE_SHIFT;
 	page_offset -= bomapping->mmnode.start;
 
-	mutex_lock(&bo->base.pages_lock);
+	obj = &bo->base.base;
+
+	dma_resv_lock(obj->resv, NULL);
 
 	if (!bo->base.pages) {
 		bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M,
 				     sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
 		if (!bo->sgts) {
-			mutex_unlock(&bo->base.pages_lock);
+			dma_resv_unlock(obj->resv);
 			ret = -ENOMEM;
 			goto err_bo;
 		}
@@ -462,7 +465,7 @@  static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
 		if (!pages) {
 			kvfree(bo->sgts);
 			bo->sgts = NULL;
-			mutex_unlock(&bo->base.pages_lock);
+			dma_resv_unlock(obj->resv);
 			ret = -ENOMEM;
 			goto err_bo;
 		}
@@ -472,7 +475,7 @@  static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
 		pages = bo->base.pages;
 		if (pages[page_offset]) {
 			/* Pages are already mapped, bail out. */
-			mutex_unlock(&bo->base.pages_lock);
+			dma_resv_unlock(obj->resv);
 			goto out;
 		}
 	}
@@ -483,13 +486,13 @@  static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
 	for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
 		pages[i] = shmem_read_mapping_page(mapping, i);
 		if (IS_ERR(pages[i])) {
-			mutex_unlock(&bo->base.pages_lock);
+			dma_resv_unlock(obj->resv);
 			ret = PTR_ERR(pages[i]);
 			goto err_pages;
 		}
 	}
 
-	mutex_unlock(&bo->base.pages_lock);
+	dma_resv_unlock(obj->resv);
 
 	sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)];
 	ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index d0a57853c188..70889533962a 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -26,11 +26,6 @@  struct drm_gem_shmem_object {
 	 */
 	struct drm_gem_object base;
 
-	/**
-	 * @pages_lock: Protects the page table and use count
-	 */
-	struct mutex pages_lock;
-
 	/**
 	 * @pages: Page table
 	 */
@@ -79,11 +74,6 @@  struct drm_gem_shmem_object {
 	 */
 	struct sg_table *sgt;
 
-	/**
-	 * @vmap_lock: Protects the vmap address and use count
-	 */
-	struct mutex vmap_lock;
-
 	/**
 	 * @vaddr: Kernel virtual address of the backing memory
 	 */