diff mbox series

[4/7] drm/radeon: Pin buffers while they are vmap'ed

Message ID 20201112132117.27228-5-tzimmermann@suse.de (mailing list archive)
State New, archived
Headers show
Series drm/radeon: Convert to generic fbdev emulation | expand

Commit Message

Thomas Zimmermann Nov. 12, 2020, 1:21 p.m. UTC
In order to avoid eviction of vmap'ed buffers, pin them in their GEM
object's vmap implementation. Unpin them in the vunmap implementation.
This is needed to make generic fbdev support work reliably. Without,
the buffer object could be evicted while fbdev flushed its shadow buffer.

In difference to the PRIME pin/unpin functions, the vmap code does not
modify the BOs prime_shared_count, so a vmap-pinned BO does not count as
shared.

The actual pin location is not important as the vmap call returns
information on how to access the buffer. Callers that require a
specific location should explicitly pin the BO before vmapping it.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/radeon/radeon_gem.c | 51 +++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

Comments

Christian König Nov. 12, 2020, 5:16 p.m. UTC | #1
Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
> object's vmap implementation. Unpin them in the vunmap implementation.
> This is needed to make generic fbdev support work reliably. Without,
> the buffer object could be evicted while fbdev flushed its shadow buffer.
>
> In difference to the PRIME pin/unpin functions, the vmap code does not
> modify the BOs prime_shared_count, so a vmap-pinned BO does not count as
> shared.
>
> The actual pin location is not important as the vmap call returns
> information on how to access the buffer. Callers that require a
> specific location should explicitly pin the BO before vmapping it.

Well is the buffer supposed to be scanned out?

If yes then the pin location is actually rather important since the 
hardware can only scan out from VRAM.

Regards,
Christian.

>
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> ---
>   drivers/gpu/drm/radeon/radeon_gem.c | 51 +++++++++++++++++++++++++++--
>   1 file changed, 49 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index d2876ce3bc9e..eaf7fc9a7b07 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r)
>   	return r;
>   }
>   
> +static int radeon_gem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
> +{
> +	static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
> +					   RADEON_GEM_DOMAIN_GTT |
> +					   RADEON_GEM_DOMAIN_CPU;
> +
> +	struct radeon_bo *bo = gem_to_radeon_bo(obj);
> +	int ret;
> +
> +	ret = radeon_bo_reserve(bo, false);
> +	if (ret)
> +		return ret;
> +
> +	/* pin buffer at its current location */
> +	ret = radeon_bo_pin(bo, any_domain, NULL);
> +	if (ret)
> +		goto err_radeon_bo_unreserve;
> +
> +	ret = drm_gem_ttm_vmap(obj, map);
> +	if (ret)
> +		goto err_radeon_bo_unpin;
> +
> +	radeon_bo_unreserve(bo);
> +
> +	return 0;
> +
> +err_radeon_bo_unpin:
> +	radeon_bo_unpin(bo);
> +err_radeon_bo_unreserve:
> +	radeon_bo_unreserve(bo);
> +	return ret;
> +}
> +
> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
> +{
> +	struct radeon_bo *bo = gem_to_radeon_bo(obj);
> +	int ret;
> +
> +	ret = radeon_bo_reserve(bo, false);
> +	if (ret)
> +		return;
> +
> +	drm_gem_ttm_vunmap(obj, map);
> +	radeon_bo_unpin(bo);
> +	radeon_bo_unreserve(bo);
> +}
> +
>   static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>   	.free = radeon_gem_object_free,
>   	.open = radeon_gem_object_open,
> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>   	.pin = radeon_gem_prime_pin,
>   	.unpin = radeon_gem_prime_unpin,
>   	.get_sg_table = radeon_gem_prime_get_sg_table,
> -	.vmap = drm_gem_ttm_vmap,
> -	.vunmap = drm_gem_ttm_vunmap,
> +	.vmap = radeon_gem_object_vmap,
> +	.vunmap = radeon_gem_object_vunmap,
>   };
>   
>   /*
Thomas Zimmermann Nov. 13, 2020, 7:59 a.m. UTC | #2
Hi Christian

Am 12.11.20 um 18:16 schrieb Christian König:
> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>> object's vmap implementation. Unpin them in the vunmap implementation.
>> This is needed to make generic fbdev support work reliably. Without,
>> the buffer object could be evicted while fbdev flushed its shadow buffer.
>>
>> In difference to the PRIME pin/unpin functions, the vmap code does not
>> modify the BOs prime_shared_count, so a vmap-pinned BO does not count as
>> shared.
>>
>> The actual pin location is not important as the vmap call returns
>> information on how to access the buffer. Callers that require a
>> specific location should explicitly pin the BO before vmapping it.
> 
> Well is the buffer supposed to be scanned out?

No, not by the fbdev helper.

> 
> If yes then the pin location is actually rather important since the
> hardware can only scan out from VRAM.

For relocatable BOs, fbdev uses a shadow buffer that makes all any
relocation transparent to userspace. It flushes the shadow fb into the
BO's memory if there are updates. The code is in
drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
call now pins the BO to wherever it is. The actual location does not
matter. It's vunmap'ed immediately afterwards.

For dma-buf sharing, the regular procedure of pin + vmap still apply.
This should always move the BO into GTT-managed memory.

Best regards
Thomas

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/drm_fb_helper.c#n432

> 
> Regards,
> Christian.
> 
>>
>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>> ---
>>   drivers/gpu/drm/radeon/radeon_gem.c | 51 +++++++++++++++++++++++++++--
>>   1 file changed, 49 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>> b/drivers/gpu/drm/radeon/radeon_gem.c
>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>> radeon_device *rdev, int r)
>>       return r;
>>   }
>>   +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>> struct dma_buf_map *map)
>> +{
>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>> +                       RADEON_GEM_DOMAIN_GTT |
>> +                       RADEON_GEM_DOMAIN_CPU;
>> +
>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>> +    int ret;
>> +
>> +    ret = radeon_bo_reserve(bo, false);
>> +    if (ret)
>> +        return ret;
>> +
>> +    /* pin buffer at its current location */
>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>> +    if (ret)
>> +        goto err_radeon_bo_unreserve;
>> +
>> +    ret = drm_gem_ttm_vmap(obj, map);
>> +    if (ret)
>> +        goto err_radeon_bo_unpin;
>> +
>> +    radeon_bo_unreserve(bo);
>> +
>> +    return 0;
>> +
>> +err_radeon_bo_unpin:
>> +    radeon_bo_unpin(bo);
>> +err_radeon_bo_unreserve:
>> +    radeon_bo_unreserve(bo);
>> +    return ret;
>> +}
>> +
>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>> struct dma_buf_map *map)
>> +{
>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>> +    int ret;
>> +
>> +    ret = radeon_bo_reserve(bo, false);
>> +    if (ret)
>> +        return;
>> +
>> +    drm_gem_ttm_vunmap(obj, map);
>> +    radeon_bo_unpin(bo);
>> +    radeon_bo_unreserve(bo);
>> +}
>> +
>>   static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>>       .free = radeon_gem_object_free,
>>       .open = radeon_gem_object_open,
>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>> radeon_gem_object_funcs = {
>>       .pin = radeon_gem_prime_pin,
>>       .unpin = radeon_gem_prime_unpin,
>>       .get_sg_table = radeon_gem_prime_get_sg_table,
>> -    .vmap = drm_gem_ttm_vmap,
>> -    .vunmap = drm_gem_ttm_vunmap,
>> +    .vmap = radeon_gem_object_vmap,
>> +    .vunmap = radeon_gem_object_vunmap,
>>   };
>>     /*
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Nov. 13, 2020, 4:27 p.m. UTC | #3
Hi

Am 16.11.20 um 12:28 schrieb Christian König:
> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>> Hi Christian
>>
>> Am 12.11.20 um 18:16 schrieb Christian König:
>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>>>> object's vmap implementation. Unpin them in the vunmap implementation.
>>>> This is needed to make generic fbdev support work reliably. Without,
>>>> the buffer object could be evicted while fbdev flushed its shadow
>>>> buffer.
>>>>
>>>> In difference to the PRIME pin/unpin functions, the vmap code does not
>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not
>>>> count as
>>>> shared.
>>>>
>>>> The actual pin location is not important as the vmap call returns
>>>> information on how to access the buffer. Callers that require a
>>>> specific location should explicitly pin the BO before vmapping it.
>>> Well is the buffer supposed to be scanned out?
>> No, not by the fbdev helper.
> 
> Ok in this case that should work.
> 
>>> If yes then the pin location is actually rather important since the
>>> hardware can only scan out from VRAM.
>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>> relocation transparent to userspace. It flushes the shadow fb into the
>> BO's memory if there are updates. The code is in
>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>> call now pins the BO to wherever it is. The actual location does not
>> matter. It's vunmap'ed immediately afterwards.
> 
> The problem is what happens when it is prepared for scanout, but can't
> be moved to VRAM because it is vmapped?
> 
> When the shadow is never scanned out that isn't a problem, but we need
> to keep that in mind.

If this is a problem is practice, it has never shown up with the drivers
that use it already.

I think here's a modeset lock somewhere that could serialize these
operations. The fbdev console is not double buffered, so there's no
frequent pageflipping; hence interference should be small.

Best regards
Thomas

> 
> Regards,
> Christian.
> 
>>
>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>> This should always move the BO into GTT-managed memory.
>>
>> Best regards
>> Thomas
>>
>> [1]
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0
>>
>>
>>> Regards,
>>> Christian.
>>>
>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51
>>>> +++++++++++++++++++++++++++--
>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>> radeon_device *rdev, int r)
>>>>        return r;
>>>>    }
>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>> struct dma_buf_map *map)
>>>> +{
>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>> +
>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>> +    int ret;
>>>> +
>>>> +    ret = radeon_bo_reserve(bo, false);
>>>> +    if (ret)
>>>> +        return ret;
>>>> +
>>>> +    /* pin buffer at its current location */
>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>> +    if (ret)
>>>> +        goto err_radeon_bo_unreserve;
>>>> +
>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>> +    if (ret)
>>>> +        goto err_radeon_bo_unpin;
>>>> +
>>>> +    radeon_bo_unreserve(bo);
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_radeon_bo_unpin:
>>>> +    radeon_bo_unpin(bo);
>>>> +err_radeon_bo_unreserve:
>>>> +    radeon_bo_unreserve(bo);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>> struct dma_buf_map *map)
>>>> +{
>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>> +    int ret;
>>>> +
>>>> +    ret = radeon_bo_reserve(bo, false);
>>>> +    if (ret)
>>>> +        return;
>>>> +
>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>> +    radeon_bo_unpin(bo);
>>>> +    radeon_bo_unreserve(bo);
>>>> +}
>>>> +
>>>>    static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>>>>        .free = radeon_gem_object_free,
>>>>        .open = radeon_gem_object_open,
>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>> radeon_gem_object_funcs = {
>>>>        .pin = radeon_gem_prime_pin,
>>>>        .unpin = radeon_gem_prime_unpin,
>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>> -    .vmap = drm_gem_ttm_vmap,
>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>> +    .vmap = radeon_gem_object_vmap,
>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>    };
>>>>      /*
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0
>>>
>
Christian König Nov. 16, 2020, 11:28 a.m. UTC | #4
Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
> Hi Christian
>
> Am 12.11.20 um 18:16 schrieb Christian König:
>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>>> object's vmap implementation. Unpin them in the vunmap implementation.
>>> This is needed to make generic fbdev support work reliably. Without,
>>> the buffer object could be evicted while fbdev flushed its shadow buffer.
>>>
>>> In difference to the PRIME pin/unpin functions, the vmap code does not
>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not count as
>>> shared.
>>>
>>> The actual pin location is not important as the vmap call returns
>>> information on how to access the buffer. Callers that require a
>>> specific location should explicitly pin the BO before vmapping it.
>> Well is the buffer supposed to be scanned out?
> No, not by the fbdev helper.

Ok in this case that should work.

>> If yes then the pin location is actually rather important since the
>> hardware can only scan out from VRAM.
> For relocatable BOs, fbdev uses a shadow buffer that makes all any
> relocation transparent to userspace. It flushes the shadow fb into the
> BO's memory if there are updates. The code is in
> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
> call now pins the BO to wherever it is. The actual location does not
> matter. It's vunmap'ed immediately afterwards.

The problem is what happens when it is prepared for scanout, but can't 
be moved to VRAM because it is vmapped?

When the shadow is never scanned out that isn't a problem, but we need 
to keep that in mind.

Regards,
Christian.

>
> For dma-buf sharing, the regular procedure of pin + vmap still apply.
> This should always move the BO into GTT-managed memory.
>
> Best regards
> Thomas
>
> [1]
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0
>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>> ---
>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51 +++++++++++++++++++++++++++--
>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>> radeon_device *rdev, int r)
>>>        return r;
>>>    }
>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>> struct dma_buf_map *map)
>>> +{
>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>> +                       RADEON_GEM_DOMAIN_GTT |
>>> +                       RADEON_GEM_DOMAIN_CPU;
>>> +
>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>> +    int ret;
>>> +
>>> +    ret = radeon_bo_reserve(bo, false);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    /* pin buffer at its current location */
>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>> +    if (ret)
>>> +        goto err_radeon_bo_unreserve;
>>> +
>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>> +    if (ret)
>>> +        goto err_radeon_bo_unpin;
>>> +
>>> +    radeon_bo_unreserve(bo);
>>> +
>>> +    return 0;
>>> +
>>> +err_radeon_bo_unpin:
>>> +    radeon_bo_unpin(bo);
>>> +err_radeon_bo_unreserve:
>>> +    radeon_bo_unreserve(bo);
>>> +    return ret;
>>> +}
>>> +
>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>> struct dma_buf_map *map)
>>> +{
>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>> +    int ret;
>>> +
>>> +    ret = radeon_bo_reserve(bo, false);
>>> +    if (ret)
>>> +        return;
>>> +
>>> +    drm_gem_ttm_vunmap(obj, map);
>>> +    radeon_bo_unpin(bo);
>>> +    radeon_bo_unreserve(bo);
>>> +}
>>> +
>>>    static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>>>        .free = radeon_gem_object_free,
>>>        .open = radeon_gem_object_open,
>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>> radeon_gem_object_funcs = {
>>>        .pin = radeon_gem_prime_pin,
>>>        .unpin = radeon_gem_prime_unpin,
>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>> -    .vmap = drm_gem_ttm_vmap,
>>> -    .vunmap = drm_gem_ttm_vunmap,
>>> +    .vmap = radeon_gem_object_vmap,
>>> +    .vunmap = radeon_gem_object_vunmap,
>>>    };
>>>      /*
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0
Thomas Zimmermann Nov. 16, 2020, 8:07 p.m. UTC | #5
Hi

Am 16.11.20 um 12:28 schrieb Christian König:
> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>> Hi Christian
>>
>> Am 12.11.20 um 18:16 schrieb Christian König:
>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>>>> object's vmap implementation. Unpin them in the vunmap implementation.
>>>> This is needed to make generic fbdev support work reliably. Without,
>>>> the buffer object could be evicted while fbdev flushed its shadow
>>>> buffer.
>>>>
>>>> In difference to the PRIME pin/unpin functions, the vmap code does not
>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not
>>>> count as
>>>> shared.
>>>>
>>>> The actual pin location is not important as the vmap call returns
>>>> information on how to access the buffer. Callers that require a
>>>> specific location should explicitly pin the BO before vmapping it.
>>> Well is the buffer supposed to be scanned out?
>> No, not by the fbdev helper.
> 
> Ok in this case that should work.
> 
>>> If yes then the pin location is actually rather important since the
>>> hardware can only scan out from VRAM.
>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>> relocation transparent to userspace. It flushes the shadow fb into the
>> BO's memory if there are updates. The code is in
>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>> call now pins the BO to wherever it is. The actual location does not
>> matter. It's vunmap'ed immediately afterwards.
> 
> The problem is what happens when it is prepared for scanout, but can't
> be moved to VRAM because it is vmapped?
> 
> When the shadow is never scanned out that isn't a problem, but we need
> to keep that in mind.

I sent out a patchset that addresses the issue in it's final patch. [1]
I'd appreciate your feedback. It also tested the patches with the
converted radeon driver.

Best regards
Thomas

[1] https://patchwork.freedesktop.org/series/83918/

> 
> Regards,
> Christian.
> 
>>
>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>> This should always move the BO into GTT-managed memory.
>>
>> Best regards
>> Thomas
>>
>> [1]
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0
>>
>>
>>> Regards,
>>> Christian.
>>>
>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51
>>>> +++++++++++++++++++++++++++--
>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>> radeon_device *rdev, int r)
>>>>        return r;
>>>>    }
>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>> struct dma_buf_map *map)
>>>> +{
>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>> +
>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>> +    int ret;
>>>> +
>>>> +    ret = radeon_bo_reserve(bo, false);
>>>> +    if (ret)
>>>> +        return ret;
>>>> +
>>>> +    /* pin buffer at its current location */
>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>> +    if (ret)
>>>> +        goto err_radeon_bo_unreserve;
>>>> +
>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>> +    if (ret)
>>>> +        goto err_radeon_bo_unpin;
>>>> +
>>>> +    radeon_bo_unreserve(bo);
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_radeon_bo_unpin:
>>>> +    radeon_bo_unpin(bo);
>>>> +err_radeon_bo_unreserve:
>>>> +    radeon_bo_unreserve(bo);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>> struct dma_buf_map *map)
>>>> +{
>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>> +    int ret;
>>>> +
>>>> +    ret = radeon_bo_reserve(bo, false);
>>>> +    if (ret)
>>>> +        return;
>>>> +
>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>> +    radeon_bo_unpin(bo);
>>>> +    radeon_bo_unreserve(bo);
>>>> +}
>>>> +
>>>>    static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>>>>        .free = radeon_gem_object_free,
>>>>        .open = radeon_gem_object_open,
>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>> radeon_gem_object_funcs = {
>>>>        .pin = radeon_gem_prime_pin,
>>>>        .unpin = radeon_gem_prime_unpin,
>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>> -    .vmap = drm_gem_ttm_vmap,
>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>> +    .vmap = radeon_gem_object_vmap,
>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>    };
>>>>      /*
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0
>>>
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Nov. 24, 2020, 9:16 a.m. UTC | #6
Hi Christian

Am 16.11.20 um 12:28 schrieb Christian König:
> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>> Hi Christian
>>
>> Am 12.11.20 um 18:16 schrieb Christian König:
>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>>>> object's vmap implementation. Unpin them in the vunmap implementation.
>>>> This is needed to make generic fbdev support work reliably. Without,
>>>> the buffer object could be evicted while fbdev flushed its shadow 
>>>> buffer.
>>>>
>>>> In difference to the PRIME pin/unpin functions, the vmap code does not
>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not 
>>>> count as
>>>> shared.
>>>>
>>>> The actual pin location is not important as the vmap call returns
>>>> information on how to access the buffer. Callers that require a
>>>> specific location should explicitly pin the BO before vmapping it.
>>> Well is the buffer supposed to be scanned out?
>> No, not by the fbdev helper.
> 
> Ok in this case that should work.
> 
>>> If yes then the pin location is actually rather important since the
>>> hardware can only scan out from VRAM.
>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>> relocation transparent to userspace. It flushes the shadow fb into the
>> BO's memory if there are updates. The code is in
>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>> call now pins the BO to wherever it is. The actual location does not
>> matter. It's vunmap'ed immediately afterwards.
> 
> The problem is what happens when it is prepared for scanout, but can't 
> be moved to VRAM because it is vmapped?
> 
> When the shadow is never scanned out that isn't a problem, but we need 
> to keep that in mind.
> 

I'd like ask for your suggestions before sending an update for this patch.

After the discussion about locking in fbdev, [1] I intended to replace 
the pin call with code that acquires the reservation lock.

First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but then 
wondered why ttm_bo_vmap() doe not acquire the lock internally? I'd 
expect that vmap/vunmap are close together and do not overlap for the 
same BO. Otherwise, acquiring the reservation lock would require another 
ref-counting variable or per-driver code.

Best regards
Thomas

[1] https://patchwork.freedesktop.org/patch/401088/?series=83918&rev=1

> Regards,
> Christian.
> 
>>
>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>> This should always move the BO into GTT-managed memory.
>>
>> Best regards
>> Thomas
>>
>> [1]
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0 
>>
>>
>>> Regards,
>>> Christian.
>>>
>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51 
>>>> +++++++++++++++++++++++++++--
>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>> radeon_device *rdev, int r)
>>>>        return r;
>>>>    }
>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>> struct dma_buf_map *map)
>>>> +{
>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>> +
>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>> +    int ret;
>>>> +
>>>> +    ret = radeon_bo_reserve(bo, false);
>>>> +    if (ret)
>>>> +        return ret;
>>>> +
>>>> +    /* pin buffer at its current location */
>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>> +    if (ret)
>>>> +        goto err_radeon_bo_unreserve;
>>>> +
>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>> +    if (ret)
>>>> +        goto err_radeon_bo_unpin;
>>>> +
>>>> +    radeon_bo_unreserve(bo);
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_radeon_bo_unpin:
>>>> +    radeon_bo_unpin(bo);
>>>> +err_radeon_bo_unreserve:
>>>> +    radeon_bo_unreserve(bo);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>> struct dma_buf_map *map)
>>>> +{
>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>> +    int ret;
>>>> +
>>>> +    ret = radeon_bo_reserve(bo, false);
>>>> +    if (ret)
>>>> +        return;
>>>> +
>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>> +    radeon_bo_unpin(bo);
>>>> +    radeon_bo_unreserve(bo);
>>>> +}
>>>> +
>>>>    static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
>>>>        .free = radeon_gem_object_free,
>>>>        .open = radeon_gem_object_open,
>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>> radeon_gem_object_funcs = {
>>>>        .pin = radeon_gem_prime_pin,
>>>>        .unpin = radeon_gem_prime_unpin,
>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>> -    .vmap = drm_gem_ttm_vmap,
>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>> +    .vmap = radeon_gem_object_vmap,
>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>    };
>>>>      /*
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0 
>>>
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Nov. 24, 2020, 11:30 a.m. UTC | #7
Am 24.11.20 um 10:16 schrieb Thomas Zimmermann:
> Hi Christian
>
> Am 16.11.20 um 12:28 schrieb Christian König:
>> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>>> Hi Christian
>>>
>>> Am 12.11.20 um 18:16 schrieb Christian König:
>>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>>>>> object's vmap implementation. Unpin them in the vunmap 
>>>>> implementation.
>>>>> This is needed to make generic fbdev support work reliably. Without,
>>>>> the buffer object could be evicted while fbdev flushed its shadow 
>>>>> buffer.
>>>>>
>>>>> In difference to the PRIME pin/unpin functions, the vmap code does 
>>>>> not
>>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not 
>>>>> count as
>>>>> shared.
>>>>>
>>>>> The actual pin location is not important as the vmap call returns
>>>>> information on how to access the buffer. Callers that require a
>>>>> specific location should explicitly pin the BO before vmapping it.
>>>> Well is the buffer supposed to be scanned out?
>>> No, not by the fbdev helper.
>>
>> Ok in this case that should work.
>>
>>>> If yes then the pin location is actually rather important since the
>>>> hardware can only scan out from VRAM.
>>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>>> relocation transparent to userspace. It flushes the shadow fb into the
>>> BO's memory if there are updates. The code is in
>>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>>> call now pins the BO to wherever it is. The actual location does not
>>> matter. It's vunmap'ed immediately afterwards.
>>
>> The problem is what happens when it is prepared for scanout, but 
>> can't be moved to VRAM because it is vmapped?
>>
>> When the shadow is never scanned out that isn't a problem, but we 
>> need to keep that in mind.
>>
>
> I'd like ask for your suggestions before sending an update for this 
> patch.
>
> After the discussion about locking in fbdev, [1] I intended to replace 
> the pin call with code that acquires the reservation lock.

Yeah, that sounds like a good idea to me as well.

> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but then 
> wondered why ttm_bo_vmap() doe not acquire the lock internally? I'd 
> expect that vmap/vunmap are close together and do not overlap for the 
> same BO. 

We have use cases like the following during command submission:

1. lock
2. map
3. copy parts of the BO content somewhere else or patch it with 
additional information
4. unmap
5. submit BO to the hardware
6. add hardware fence to the BO to make sure it doesn't move
7. unlock

That use case won't be possible with vmap/vunmap if we move the 
lock/unlock into it and I hope to replace the kmap/kunmap functions with 
them in the near term.

> Otherwise, acquiring the reservation lock would require another 
> ref-counting variable or per-driver code.

Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() helper as 
you initially planned.

Regards,
Christian.

>
> Best regards
> Thomas
>
> [1] https://patchwork.freedesktop.org/patch/401088/?series=83918&rev=1
>
>> Regards,
>> Christian.
>>
>>>
>>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>>> This should always move the BO into GTT-managed memory.
>>>
>>> Best regards
>>> Thomas
>>>
>>> [1]
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0 
>>>
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>>> ---
>>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51 
>>>>> +++++++++++++++++++++++++++--
>>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>>> radeon_device *rdev, int r)
>>>>>        return r;
>>>>>    }
>>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>>> struct dma_buf_map *map)
>>>>> +{
>>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>>> +
>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>> +    if (ret)
>>>>> +        return ret;
>>>>> +
>>>>> +    /* pin buffer at its current location */
>>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>>> +    if (ret)
>>>>> +        goto err_radeon_bo_unreserve;
>>>>> +
>>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>>> +    if (ret)
>>>>> +        goto err_radeon_bo_unpin;
>>>>> +
>>>>> +    radeon_bo_unreserve(bo);
>>>>> +
>>>>> +    return 0;
>>>>> +
>>>>> +err_radeon_bo_unpin:
>>>>> +    radeon_bo_unpin(bo);
>>>>> +err_radeon_bo_unreserve:
>>>>> +    radeon_bo_unreserve(bo);
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>>> struct dma_buf_map *map)
>>>>> +{
>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>> +    if (ret)
>>>>> +        return;
>>>>> +
>>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>>> +    radeon_bo_unpin(bo);
>>>>> +    radeon_bo_unreserve(bo);
>>>>> +}
>>>>> +
>>>>>    static const struct drm_gem_object_funcs 
>>>>> radeon_gem_object_funcs = {
>>>>>        .free = radeon_gem_object_free,
>>>>>        .open = radeon_gem_object_open,
>>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>>> radeon_gem_object_funcs = {
>>>>>        .pin = radeon_gem_prime_pin,
>>>>>        .unpin = radeon_gem_prime_unpin,
>>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>>> -    .vmap = drm_gem_ttm_vmap,
>>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>>> +    .vmap = radeon_gem_object_vmap,
>>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>>    };
>>>>>      /*
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0 
>>>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Thomas Zimmermann Nov. 24, 2020, 11:44 a.m. UTC | #8
Hi

Am 24.11.20 um 12:30 schrieb Christian König:
> Am 24.11.20 um 10:16 schrieb Thomas Zimmermann:
>> Hi Christian
>>
>> Am 16.11.20 um 12:28 schrieb Christian König:
>>> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>>>> Hi Christian
>>>>
>>>> Am 12.11.20 um 18:16 schrieb Christian König:
>>>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>>>> In order to avoid eviction of vmap'ed buffers, pin them in their GEM
>>>>>> object's vmap implementation. Unpin them in the vunmap 
>>>>>> implementation.
>>>>>> This is needed to make generic fbdev support work reliably. Without,
>>>>>> the buffer object could be evicted while fbdev flushed its shadow 
>>>>>> buffer.
>>>>>>
>>>>>> In difference to the PRIME pin/unpin functions, the vmap code does 
>>>>>> not
>>>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not 
>>>>>> count as
>>>>>> shared.
>>>>>>
>>>>>> The actual pin location is not important as the vmap call returns
>>>>>> information on how to access the buffer. Callers that require a
>>>>>> specific location should explicitly pin the BO before vmapping it.
>>>>> Well is the buffer supposed to be scanned out?
>>>> No, not by the fbdev helper.
>>>
>>> Ok in this case that should work.
>>>
>>>>> If yes then the pin location is actually rather important since the
>>>>> hardware can only scan out from VRAM.
>>>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>>>> relocation transparent to userspace. It flushes the shadow fb into the
>>>> BO's memory if there are updates. The code is in
>>>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>>>> call now pins the BO to wherever it is. The actual location does not
>>>> matter. It's vunmap'ed immediately afterwards.
>>>
>>> The problem is what happens when it is prepared for scanout, but 
>>> can't be moved to VRAM because it is vmapped?
>>>
>>> When the shadow is never scanned out that isn't a problem, but we 
>>> need to keep that in mind.
>>>
>>
>> I'd like ask for your suggestions before sending an update for this 
>> patch.
>>
>> After the discussion about locking in fbdev, [1] I intended to replace 
>> the pin call with code that acquires the reservation lock.
> 
> Yeah, that sounds like a good idea to me as well.
> 
>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but then 
>> wondered why ttm_bo_vmap() doe not acquire the lock internally? I'd 
>> expect that vmap/vunmap are close together and do not overlap for the 
>> same BO. 
> 
> We have use cases like the following during command submission:
> 
> 1. lock
> 2. map
> 3. copy parts of the BO content somewhere else or patch it with 
> additional information
> 4. unmap
> 5. submit BO to the hardware
> 6. add hardware fence to the BO to make sure it doesn't move
> 7. unlock
> 
> That use case won't be possible with vmap/vunmap if we move the 
> lock/unlock into it and I hope to replace the kmap/kunmap functions with 
> them in the near term.
> 
>> Otherwise, acquiring the reservation lock would require another 
>> ref-counting variable or per-driver code.
> 
> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() helper as 
> you initially planned.

Given your example above, step one would acquire the lock, and step two 
would also acquire the lock as part of the vmap implementation. Wouldn't 
this fail (At least during unmap or unlock steps) ?

Best regards
Thomas

> 
> Regards,
> Christian.
> 
>>
>> Best regards
>> Thomas
>>
>> [1] https://patchwork.freedesktop.org/patch/401088/?series=83918&rev=1
>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>>>> This should always move the BO into GTT-managed memory.
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>> [1]
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0 
>>>>
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>>>> ---
>>>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51 
>>>>>> +++++++++++++++++++++++++++--
>>>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>>>> radeon_device *rdev, int r)
>>>>>>        return r;
>>>>>>    }
>>>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>>>> struct dma_buf_map *map)
>>>>>> +{
>>>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>>>> +
>>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>>> +    if (ret)
>>>>>> +        return ret;
>>>>>> +
>>>>>> +    /* pin buffer at its current location */
>>>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>>>> +    if (ret)
>>>>>> +        goto err_radeon_bo_unreserve;
>>>>>> +
>>>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>>>> +    if (ret)
>>>>>> +        goto err_radeon_bo_unpin;
>>>>>> +
>>>>>> +    radeon_bo_unreserve(bo);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +
>>>>>> +err_radeon_bo_unpin:
>>>>>> +    radeon_bo_unpin(bo);
>>>>>> +err_radeon_bo_unreserve:
>>>>>> +    radeon_bo_unreserve(bo);
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>>>> struct dma_buf_map *map)
>>>>>> +{
>>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>>> +    if (ret)
>>>>>> +        return;
>>>>>> +
>>>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>>>> +    radeon_bo_unpin(bo);
>>>>>> +    radeon_bo_unreserve(bo);
>>>>>> +}
>>>>>> +
>>>>>>    static const struct drm_gem_object_funcs 
>>>>>> radeon_gem_object_funcs = {
>>>>>>        .free = radeon_gem_object_free,
>>>>>>        .open = radeon_gem_object_open,
>>>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>>>> radeon_gem_object_funcs = {
>>>>>>        .pin = radeon_gem_prime_pin,
>>>>>>        .unpin = radeon_gem_prime_unpin,
>>>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>>>> -    .vmap = drm_gem_ttm_vmap,
>>>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>>>> +    .vmap = radeon_gem_object_vmap,
>>>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>>>    };
>>>>>>      /*
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel@lists.freedesktop.org
>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0 
>>>>>
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
>
Christian König Nov. 24, 2020, 11:54 a.m. UTC | #9
Am 24.11.20 um 12:44 schrieb Thomas Zimmermann:
> Hi
>
> Am 24.11.20 um 12:30 schrieb Christian König:
>> Am 24.11.20 um 10:16 schrieb Thomas Zimmermann:
>>> Hi Christian
>>>
>>> Am 16.11.20 um 12:28 schrieb Christian König:
>>>> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>>>>> Hi Christian
>>>>>
>>>>> Am 12.11.20 um 18:16 schrieb Christian König:
>>>>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>>>>> In order to avoid eviction of vmap'ed buffers, pin them in their 
>>>>>>> GEM
>>>>>>> object's vmap implementation. Unpin them in the vunmap 
>>>>>>> implementation.
>>>>>>> This is needed to make generic fbdev support work reliably. 
>>>>>>> Without,
>>>>>>> the buffer object could be evicted while fbdev flushed its 
>>>>>>> shadow buffer.
>>>>>>>
>>>>>>> In difference to the PRIME pin/unpin functions, the vmap code 
>>>>>>> does not
>>>>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not 
>>>>>>> count as
>>>>>>> shared.
>>>>>>>
>>>>>>> The actual pin location is not important as the vmap call returns
>>>>>>> information on how to access the buffer. Callers that require a
>>>>>>> specific location should explicitly pin the BO before vmapping it.
>>>>>> Well is the buffer supposed to be scanned out?
>>>>> No, not by the fbdev helper.
>>>>
>>>> Ok in this case that should work.
>>>>
>>>>>> If yes then the pin location is actually rather important since the
>>>>>> hardware can only scan out from VRAM.
>>>>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>>>>> relocation transparent to userspace. It flushes the shadow fb into 
>>>>> the
>>>>> BO's memory if there are updates. The code is in
>>>>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>>>>> call now pins the BO to wherever it is. The actual location does not
>>>>> matter. It's vunmap'ed immediately afterwards.
>>>>
>>>> The problem is what happens when it is prepared for scanout, but 
>>>> can't be moved to VRAM because it is vmapped?
>>>>
>>>> When the shadow is never scanned out that isn't a problem, but we 
>>>> need to keep that in mind.
>>>>
>>>
>>> I'd like ask for your suggestions before sending an update for this 
>>> patch.
>>>
>>> After the discussion about locking in fbdev, [1] I intended to 
>>> replace the pin call with code that acquires the reservation lock.
>>
>> Yeah, that sounds like a good idea to me as well.
>>
>>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but then 
>>> wondered why ttm_bo_vmap() doe not acquire the lock internally? I'd 
>>> expect that vmap/vunmap are close together and do not overlap for 
>>> the same BO. 
>>
>> We have use cases like the following during command submission:
>>
>> 1. lock
>> 2. map
>> 3. copy parts of the BO content somewhere else or patch it with 
>> additional information
>> 4. unmap
>> 5. submit BO to the hardware
>> 6. add hardware fence to the BO to make sure it doesn't move
>> 7. unlock
>>
>> That use case won't be possible with vmap/vunmap if we move the 
>> lock/unlock into it and I hope to replace the kmap/kunmap functions 
>> with them in the near term.
>>
>>> Otherwise, acquiring the reservation lock would require another 
>>> ref-counting variable or per-driver code.
>>
>> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() helper as 
>> you initially planned.
>
> Given your example above, step one would acquire the lock, and step 
> two would also acquire the lock as part of the vmap implementation. 
> Wouldn't this fail (At least during unmap or unlock steps) ?

Oh, so you want to nest them? No, that is a rather bad no-go.

You need to make sure that the lock is only taken from the FB path which 
wants to vmap the object.

Why don't you lock the GEM object from the caller in the generic FB 
implementation?

Regards,
Christian.

>
> Best regards
> Thomas
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Best regards
>>> Thomas
>>>
>>> [1] https://patchwork.freedesktop.org/patch/401088/?series=83918&rev=1
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>>>>> This should always move the BO into GTT-managed memory.
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>> [1]
>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0 
>>>>>
>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>>>>> ---
>>>>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51 
>>>>>>> +++++++++++++++++++++++++++--
>>>>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>>>>> radeon_device *rdev, int r)
>>>>>>>        return r;
>>>>>>>    }
>>>>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>>>>> struct dma_buf_map *map)
>>>>>>> +{
>>>>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>>>>> +
>>>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>>>> +    if (ret)
>>>>>>> +        return ret;
>>>>>>> +
>>>>>>> +    /* pin buffer at its current location */
>>>>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>>>>> +    if (ret)
>>>>>>> +        goto err_radeon_bo_unreserve;
>>>>>>> +
>>>>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>>>>> +    if (ret)
>>>>>>> +        goto err_radeon_bo_unpin;
>>>>>>> +
>>>>>>> +    radeon_bo_unreserve(bo);
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +
>>>>>>> +err_radeon_bo_unpin:
>>>>>>> +    radeon_bo_unpin(bo);
>>>>>>> +err_radeon_bo_unreserve:
>>>>>>> +    radeon_bo_unreserve(bo);
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>>>>> struct dma_buf_map *map)
>>>>>>> +{
>>>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>>>> +    if (ret)
>>>>>>> +        return;
>>>>>>> +
>>>>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>>>>> +    radeon_bo_unpin(bo);
>>>>>>> +    radeon_bo_unreserve(bo);
>>>>>>> +}
>>>>>>> +
>>>>>>>    static const struct drm_gem_object_funcs 
>>>>>>> radeon_gem_object_funcs = {
>>>>>>>        .free = radeon_gem_object_free,
>>>>>>>        .open = radeon_gem_object_open,
>>>>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>>>>> radeon_gem_object_funcs = {
>>>>>>>        .pin = radeon_gem_prime_pin,
>>>>>>>        .unpin = radeon_gem_prime_unpin,
>>>>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>>>>> -    .vmap = drm_gem_ttm_vmap,
>>>>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>>>>> +    .vmap = radeon_gem_object_vmap,
>>>>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>>>>    };
>>>>>>>      /*
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel@lists.freedesktop.org
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0 
>>>>>>
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>
>>
>
Thomas Zimmermann Nov. 24, 2020, 12:15 p.m. UTC | #10
Hi

Am 24.11.20 um 12:54 schrieb Christian König:
> Am 24.11.20 um 12:44 schrieb Thomas Zimmermann:
>> Hi
>>
>> Am 24.11.20 um 12:30 schrieb Christian König:
>>> Am 24.11.20 um 10:16 schrieb Thomas Zimmermann:
>>>> Hi Christian
>>>>
>>>> Am 16.11.20 um 12:28 schrieb Christian König:
>>>>> Am 13.11.20 um 08:59 schrieb Thomas Zimmermann:
>>>>>> Hi Christian
>>>>>>
>>>>>> Am 12.11.20 um 18:16 schrieb Christian König:
>>>>>>> Am 12.11.20 um 14:21 schrieb Thomas Zimmermann:
>>>>>>>> In order to avoid eviction of vmap'ed buffers, pin them in their 
>>>>>>>> GEM
>>>>>>>> object's vmap implementation. Unpin them in the vunmap 
>>>>>>>> implementation.
>>>>>>>> This is needed to make generic fbdev support work reliably. 
>>>>>>>> Without,
>>>>>>>> the buffer object could be evicted while fbdev flushed its 
>>>>>>>> shadow buffer.
>>>>>>>>
>>>>>>>> In difference to the PRIME pin/unpin functions, the vmap code 
>>>>>>>> does not
>>>>>>>> modify the BOs prime_shared_count, so a vmap-pinned BO does not 
>>>>>>>> count as
>>>>>>>> shared.
>>>>>>>>
>>>>>>>> The actual pin location is not important as the vmap call returns
>>>>>>>> information on how to access the buffer. Callers that require a
>>>>>>>> specific location should explicitly pin the BO before vmapping it.
>>>>>>> Well is the buffer supposed to be scanned out?
>>>>>> No, not by the fbdev helper.
>>>>>
>>>>> Ok in this case that should work.
>>>>>
>>>>>>> If yes then the pin location is actually rather important since the
>>>>>>> hardware can only scan out from VRAM.
>>>>>> For relocatable BOs, fbdev uses a shadow buffer that makes all any
>>>>>> relocation transparent to userspace. It flushes the shadow fb into 
>>>>>> the
>>>>>> BO's memory if there are updates. The code is in
>>>>>> drm_fb_helper_dirty_work(). [1] During the flush operation, the vmap
>>>>>> call now pins the BO to wherever it is. The actual location does not
>>>>>> matter. It's vunmap'ed immediately afterwards.
>>>>>
>>>>> The problem is what happens when it is prepared for scanout, but 
>>>>> can't be moved to VRAM because it is vmapped?
>>>>>
>>>>> When the shadow is never scanned out that isn't a problem, but we 
>>>>> need to keep that in mind.
>>>>>
>>>>
>>>> I'd like ask for your suggestions before sending an update for this 
>>>> patch.
>>>>
>>>> After the discussion about locking in fbdev, [1] I intended to 
>>>> replace the pin call with code that acquires the reservation lock.
>>>
>>> Yeah, that sounds like a good idea to me as well.
>>>
>>>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but then 
>>>> wondered why ttm_bo_vmap() doe not acquire the lock internally? I'd 
>>>> expect that vmap/vunmap are close together and do not overlap for 
>>>> the same BO. 
>>>
>>> We have use cases like the following during command submission:
>>>
>>> 1. lock
>>> 2. map
>>> 3. copy parts of the BO content somewhere else or patch it with 
>>> additional information
>>> 4. unmap
>>> 5. submit BO to the hardware
>>> 6. add hardware fence to the BO to make sure it doesn't move
>>> 7. unlock
>>>
>>> That use case won't be possible with vmap/vunmap if we move the 
>>> lock/unlock into it and I hope to replace the kmap/kunmap functions 
>>> with them in the near term.
>>>
>>>> Otherwise, acquiring the reservation lock would require another 
>>>> ref-counting variable or per-driver code.
>>>
>>> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() helper as 
>>> you initially planned.
>>
>> Given your example above, step one would acquire the lock, and step 
>> two would also acquire the lock as part of the vmap implementation. 
>> Wouldn't this fail (At least during unmap or unlock steps) ?
> 
> Oh, so you want to nest them? No, that is a rather bad no-go.

I don't want to nest/overlap them. My question was whether that would be 
required. Apparently not.

While the console's BO is being set for scanout, it's protected from 
movement via the pin/unpin implementation, right? The driver does not 
acquire the resv lock for longer periods. I'm asking because this would 
prevent any console-buffer updates while the console is being displayed.

> 
> You need to make sure that the lock is only taken from the FB path which 
> wants to vmap the object.
> 
> Why don't you lock the GEM object from the caller in the generic FB 
> implementation?

With the current blitter code, it breaks abstraction. if vmap/vunmap 
hold the lock implicitly, things would be easier.

Sorry for the noob questions. I'm still trying to understand the 
implications of acquiring these locks.

Best regards
Thomas

> 
> Regards,
> Christian.
> 
>>
>> Best regards
>> Thomas
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>> [1] https://patchwork.freedesktop.org/patch/401088/?series=83918&rev=1
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> For dma-buf sharing, the regular procedure of pin + vmap still apply.
>>>>>> This should always move the BO into GTT-managed memory.
>>>>>>
>>>>>> Best regards
>>>>>> Thomas
>>>>>>
>>>>>> [1]
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%23n432&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=RLauuAuXkcl0rXwWWJ%2FrKP%2BsCr2wAzU1ejGV1bnQ80w%3D&amp;reserved=0 
>>>>>>
>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>>>>>>>> ---
>>>>>>>>    drivers/gpu/drm/radeon/radeon_gem.c | 51 
>>>>>>>> +++++++++++++++++++++++++++--
>>>>>>>>    1 file changed, 49 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>>> index d2876ce3bc9e..eaf7fc9a7b07 100644
>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>>>>>>> @@ -226,6 +226,53 @@ static int radeon_gem_handle_lockup(struct
>>>>>>>> radeon_device *rdev, int r)
>>>>>>>>        return r;
>>>>>>>>    }
>>>>>>>>    +static int radeon_gem_object_vmap(struct drm_gem_object *obj,
>>>>>>>> struct dma_buf_map *map)
>>>>>>>> +{
>>>>>>>> +    static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
>>>>>>>> +                       RADEON_GEM_DOMAIN_GTT |
>>>>>>>> +                       RADEON_GEM_DOMAIN_CPU;
>>>>>>>> +
>>>>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>>>>> +    if (ret)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    /* pin buffer at its current location */
>>>>>>>> +    ret = radeon_bo_pin(bo, any_domain, NULL);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto err_radeon_bo_unreserve;
>>>>>>>> +
>>>>>>>> +    ret = drm_gem_ttm_vmap(obj, map);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto err_radeon_bo_unpin;
>>>>>>>> +
>>>>>>>> +    radeon_bo_unreserve(bo);
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +
>>>>>>>> +err_radeon_bo_unpin:
>>>>>>>> +    radeon_bo_unpin(bo);
>>>>>>>> +err_radeon_bo_unreserve:
>>>>>>>> +    radeon_bo_unreserve(bo);
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void radeon_gem_object_vunmap(struct drm_gem_object *obj,
>>>>>>>> struct dma_buf_map *map)
>>>>>>>> +{
>>>>>>>> +    struct radeon_bo *bo = gem_to_radeon_bo(obj);
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    ret = radeon_bo_reserve(bo, false);
>>>>>>>> +    if (ret)
>>>>>>>> +        return;
>>>>>>>> +
>>>>>>>> +    drm_gem_ttm_vunmap(obj, map);
>>>>>>>> +    radeon_bo_unpin(bo);
>>>>>>>> +    radeon_bo_unreserve(bo);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>    static const struct drm_gem_object_funcs 
>>>>>>>> radeon_gem_object_funcs = {
>>>>>>>>        .free = radeon_gem_object_free,
>>>>>>>>        .open = radeon_gem_object_open,
>>>>>>>> @@ -234,8 +281,8 @@ static const struct drm_gem_object_funcs
>>>>>>>> radeon_gem_object_funcs = {
>>>>>>>>        .pin = radeon_gem_prime_pin,
>>>>>>>>        .unpin = radeon_gem_prime_unpin,
>>>>>>>>        .get_sg_table = radeon_gem_prime_get_sg_table,
>>>>>>>> -    .vmap = drm_gem_ttm_vmap,
>>>>>>>> -    .vunmap = drm_gem_ttm_vunmap,
>>>>>>>> +    .vmap = radeon_gem_object_vmap,
>>>>>>>> +    .vunmap = radeon_gem_object_vunmap,
>>>>>>>>    };
>>>>>>>>      /*
>>>>>>> _______________________________________________
>>>>>>> dri-devel mailing list
>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C31b890664ca7429fc45808d887aa0842%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408511650629569%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h1U9Po83K7webxsiKpn3ZGFz9Fcg6SRkxtrXWZ1%2B%2FEc%3D&amp;reserved=0 
>>>>>>>
>>>>>
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>
>>>
>>
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Nov. 24, 2020, 1:36 p.m. UTC | #11
Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
> [SNIP]
>>>>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but 
>>>>> then wondered why ttm_bo_vmap() doe not acquire the lock 
>>>>> internally? I'd expect that vmap/vunmap are close together and do 
>>>>> not overlap for the same BO. 
>>>>
>>>> We have use cases like the following during command submission:
>>>>
>>>> 1. lock
>>>> 2. map
>>>> 3. copy parts of the BO content somewhere else or patch it with 
>>>> additional information
>>>> 4. unmap
>>>> 5. submit BO to the hardware
>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>> 7. unlock
>>>>
>>>> That use case won't be possible with vmap/vunmap if we move the 
>>>> lock/unlock into it and I hope to replace the kmap/kunmap functions 
>>>> with them in the near term.
>>>>
>>>>> Otherwise, acquiring the reservation lock would require another 
>>>>> ref-counting variable or per-driver code.
>>>>
>>>> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() helper 
>>>> as you initially planned.
>>>
>>> Given your example above, step one would acquire the lock, and step 
>>> two would also acquire the lock as part of the vmap implementation. 
>>> Wouldn't this fail (At least during unmap or unlock steps) ?
>>
>> Oh, so you want to nest them? No, that is a rather bad no-go.
>
> I don't want to nest/overlap them. My question was whether that would 
> be required. Apparently not.
>
> While the console's BO is being set for scanout, it's protected from 
> movement via the pin/unpin implementation, right?

Yes, correct.

> The driver does not acquire the resv lock for longer periods. I'm 
> asking because this would prevent any console-buffer updates while the 
> console is being displayed.

Correct as well, we only hold the lock for things like command 
submission, pinning, unpinning etc etc....

>
>>
>> You need to make sure that the lock is only taken from the FB path 
>> which wants to vmap the object.
>>
>> Why don't you lock the GEM object from the caller in the generic FB 
>> implementation?
>
> With the current blitter code, it breaks abstraction. if vmap/vunmap 
> hold the lock implicitly, things would be easier.

Do you have a link to the code?

Please note that the reservation lock you need to take here is part of 
the GEM object.

Usually we design things in the way that the code needs to take a lock 
which protects an object, then do some operations with the object and 
then release the lock again.

Having in the lock inside the operation can be done as well, but 
returning with it is kind of unusual design.

> Sorry for the noob questions. I'm still trying to understand the 
> implications of acquiring these locks.

Well this is the reservation lock of the GEM object we are talking about 
here. We need to take that for a couple of different operations, 
vmap/vunmap doesn't sound like a special case to me.

Regards,
Christian.

>
> Best regards
> Thomas
Thomas Zimmermann Nov. 24, 2020, 1:56 p.m. UTC | #12
Hi

Am 24.11.20 um 14:36 schrieb Christian König:
> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>> [SNIP]
>>>>>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but 
>>>>>> then wondered why ttm_bo_vmap() doe not acquire the lock 
>>>>>> internally? I'd expect that vmap/vunmap are close together and do 
>>>>>> not overlap for the same BO. 
>>>>>
>>>>> We have use cases like the following during command submission:
>>>>>
>>>>> 1. lock
>>>>> 2. map
>>>>> 3. copy parts of the BO content somewhere else or patch it with 
>>>>> additional information
>>>>> 4. unmap
>>>>> 5. submit BO to the hardware
>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>> 7. unlock
>>>>>
>>>>> That use case won't be possible with vmap/vunmap if we move the 
>>>>> lock/unlock into it and I hope to replace the kmap/kunmap functions 
>>>>> with them in the near term.
>>>>>
>>>>>> Otherwise, acquiring the reservation lock would require another 
>>>>>> ref-counting variable or per-driver code.
>>>>>
>>>>> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() helper 
>>>>> as you initially planned.
>>>>
>>>> Given your example above, step one would acquire the lock, and step 
>>>> two would also acquire the lock as part of the vmap implementation. 
>>>> Wouldn't this fail (At least during unmap or unlock steps) ?
>>>
>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>
>> I don't want to nest/overlap them. My question was whether that would 
>> be required. Apparently not.
>>
>> While the console's BO is being set for scanout, it's protected from 
>> movement via the pin/unpin implementation, right?
> 
> Yes, correct.
> 
>> The driver does not acquire the resv lock for longer periods. I'm 
>> asking because this would prevent any console-buffer updates while the 
>> console is being displayed.
> 
> Correct as well, we only hold the lock for things like command 
> submission, pinning, unpinning etc etc....
> 

Thanks for answering my questions.

>>
>>>
>>> You need to make sure that the lock is only taken from the FB path 
>>> which wants to vmap the object.
>>>
>>> Why don't you lock the GEM object from the caller in the generic FB 
>>> implementation?
>>
>> With the current blitter code, it breaks abstraction. if vmap/vunmap 
>> hold the lock implicitly, things would be easier.
> 
> Do you have a link to the code?

It's the damage blitter in the fbdev code. [1] While it flushes the 
shadow buffer into the BO, the BO has to be kept in place. I already 
changed it to lock struct drm_fb_helper.lock, but I don't think this is 
enough. TTM could still evict the BO concurrently.

There's no recursion taking place, so I guess the reservation lock could 
be acquired/release in drm_client_buffer_vmap/vunmap(), or a separate 
pair of DRM client functions could do the locking.

Best regards
Thomas

[1] 
https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394

> 
> Please note that the reservation lock you need to take here is part of 
> the GEM object.
> 
> Usually we design things in the way that the code needs to take a lock 
> which protects an object, then do some operations with the object and 
> then release the lock again.
> 
> Having in the lock inside the operation can be done as well, but 
> returning with it is kind of unusual design.
> 
>> Sorry for the noob questions. I'm still trying to understand the 
>> implications of acquiring these locks.
> 
> Well this is the reservation lock of the GEM object we are talking about 
> here. We need to take that for a couple of different operations, 
> vmap/vunmap doesn't sound like a special case to me.
> 
> Regards,
> Christian.
> 
>>
>> Best regards
>> Thomas
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Nov. 24, 2020, 2:06 p.m. UTC | #13
Am 24.11.20 um 14:56 schrieb Thomas Zimmermann:
> Hi
>
> Am 24.11.20 um 14:36 schrieb Christian König:
>> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>>> [SNIP]
>>>>>>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but 
>>>>>>> then wondered why ttm_bo_vmap() doe not acquire the lock 
>>>>>>> internally? I'd expect that vmap/vunmap are close together and 
>>>>>>> do not overlap for the same BO. 
>>>>>>
>>>>>> We have use cases like the following during command submission:
>>>>>>
>>>>>> 1. lock
>>>>>> 2. map
>>>>>> 3. copy parts of the BO content somewhere else or patch it with 
>>>>>> additional information
>>>>>> 4. unmap
>>>>>> 5. submit BO to the hardware
>>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>>> 7. unlock
>>>>>>
>>>>>> That use case won't be possible with vmap/vunmap if we move the 
>>>>>> lock/unlock into it and I hope to replace the kmap/kunmap 
>>>>>> functions with them in the near term.
>>>>>>
>>>>>>> Otherwise, acquiring the reservation lock would require another 
>>>>>>> ref-counting variable or per-driver code.
>>>>>>
>>>>>> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() 
>>>>>> helper as you initially planned.
>>>>>
>>>>> Given your example above, step one would acquire the lock, and 
>>>>> step two would also acquire the lock as part of the vmap 
>>>>> implementation. Wouldn't this fail (At least during unmap or 
>>>>> unlock steps) ?
>>>>
>>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>>
>>> I don't want to nest/overlap them. My question was whether that 
>>> would be required. Apparently not.
>>>
>>> While the console's BO is being set for scanout, it's protected from 
>>> movement via the pin/unpin implementation, right?
>>
>> Yes, correct.
>>
>>> The driver does not acquire the resv lock for longer periods. I'm 
>>> asking because this would prevent any console-buffer updates while 
>>> the console is being displayed.
>>
>> Correct as well, we only hold the lock for things like command 
>> submission, pinning, unpinning etc etc....
>>
>
> Thanks for answering my questions.
>
>>>
>>>>
>>>> You need to make sure that the lock is only taken from the FB path 
>>>> which wants to vmap the object.
>>>>
>>>> Why don't you lock the GEM object from the caller in the generic FB 
>>>> implementation?
>>>
>>> With the current blitter code, it breaks abstraction. if vmap/vunmap 
>>> hold the lock implicitly, things would be easier.
>>
>> Do you have a link to the code?
>
> It's the damage blitter in the fbdev code. [1] While it flushes the 
> shadow buffer into the BO, the BO has to be kept in place. I already 
> changed it to lock struct drm_fb_helper.lock, but I don't think this 
> is enough. TTM could still evict the BO concurrently.

Yeah, that's correct.

But I still don't fully understand the problem. You just need to change 
the code like this:

     mutex_lock(&fb_helper->lock);
     dma_resv_lock(buffer->gem->resv, NULL);

     ret = drm_client_buffer_vmap(buffer, &map);
     if (ret)
         goto out;

     dst = map;
     drm_fb_helper_damage_blit_real(fb_helper, clip, &dst);

     drm_client_buffer_vunmap(buffer);

out:
     dma_resv_unlock(buffer->gem->resv);
     mutex_unlock(&fb_helper->lock);


You could abstract that in drm_client functions as well, but I don't 
really see the value in that.

Regards,
Christian.

> There's no recursion taking place, so I guess the reservation lock 
> could be acquired/release in drm_client_buffer_vmap/vunmap(), or a 
> separate pair of DRM client functions could do the locking.
>
> Best regards
> Thomas
>
> [1] 
> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>
>>
>> Please note that the reservation lock you need to take here is part 
>> of the GEM object.
>>
>> Usually we design things in the way that the code needs to take a 
>> lock which protects an object, then do some operations with the 
>> object and then release the lock again.
>>
>> Having in the lock inside the operation can be done as well, but 
>> returning with it is kind of unusual design.
>>
>>> Sorry for the noob questions. I'm still trying to understand the 
>>> implications of acquiring these locks.
>>
>> Well this is the reservation lock of the GEM object we are talking 
>> about here. We need to take that for a couple of different 
>> operations, vmap/vunmap doesn't sound like a special case to me.
>>
>> Regards,
>> Christian.
>>
>>>
>>> Best regards
>>> Thomas
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Daniel Vetter Nov. 24, 2020, 2:09 p.m. UTC | #14
On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
> Hi
> 
> Am 24.11.20 um 14:36 schrieb Christian König:
> > Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
> > > [SNIP]
> > > > > > > First I wanted to put this into
> > > > > > > drm_gem_ttm_vmap/vunmap(), but then wondered why
> > > > > > > ttm_bo_vmap() doe not acquire the lock internally?
> > > > > > > I'd expect that vmap/vunmap are close together and
> > > > > > > do not overlap for the same BO.
> > > > > > 
> > > > > > We have use cases like the following during command submission:
> > > > > > 
> > > > > > 1. lock
> > > > > > 2. map
> > > > > > 3. copy parts of the BO content somewhere else or patch
> > > > > > it with additional information
> > > > > > 4. unmap
> > > > > > 5. submit BO to the hardware
> > > > > > 6. add hardware fence to the BO to make sure it doesn't move
> > > > > > 7. unlock
> > > > > > 
> > > > > > That use case won't be possible with vmap/vunmap if we
> > > > > > move the lock/unlock into it and I hope to replace the
> > > > > > kmap/kunmap functions with them in the near term.
> > > > > > 
> > > > > > > Otherwise, acquiring the reservation lock would
> > > > > > > require another ref-counting variable or per-driver
> > > > > > > code.
> > > > > > 
> > > > > > Hui, why that? Just put this into
> > > > > > drm_gem_ttm_vmap/vunmap() helper as you initially
> > > > > > planned.
> > > > > 
> > > > > Given your example above, step one would acquire the lock,
> > > > > and step two would also acquire the lock as part of the vmap
> > > > > implementation. Wouldn't this fail (At least during unmap or
> > > > > unlock steps) ?
> > > > 
> > > > Oh, so you want to nest them? No, that is a rather bad no-go.
> > > 
> > > I don't want to nest/overlap them. My question was whether that
> > > would be required. Apparently not.
> > > 
> > > While the console's BO is being set for scanout, it's protected from
> > > movement via the pin/unpin implementation, right?
> > 
> > Yes, correct.
> > 
> > > The driver does not acquire the resv lock for longer periods. I'm
> > > asking because this would prevent any console-buffer updates while
> > > the console is being displayed.
> > 
> > Correct as well, we only hold the lock for things like command
> > submission, pinning, unpinning etc etc....
> > 
> 
> Thanks for answering my questions.
> 
> > > 
> > > > 
> > > > You need to make sure that the lock is only taken from the FB
> > > > path which wants to vmap the object.
> > > > 
> > > > Why don't you lock the GEM object from the caller in the generic
> > > > FB implementation?
> > > 
> > > With the current blitter code, it breaks abstraction. if vmap/vunmap
> > > hold the lock implicitly, things would be easier.
> > 
> > Do you have a link to the code?
> 
> It's the damage blitter in the fbdev code. [1] While it flushes the shadow
> buffer into the BO, the BO has to be kept in place. I already changed it to
> lock struct drm_fb_helper.lock, but I don't think this is enough. TTM could
> still evict the BO concurrently.

So I'm not sure this is actually a problem: ttm could try to concurrently
evict the buffer we pinned into vram, and then just skip to the next one.

Plus atm generic fbdev isn't used on any chip where we really care about
that last few mb of vram being useable for command submission (well atm
there's no driver using it).

Having the buffer pinned into system memory and trying to do a concurrent
modeset that tries to pull it in is the hard failure mode. And holding
fb_helper.lock fully prevents that.

So not really clear on what failure mode you're seeing here?

> There's no recursion taking place, so I guess the reservation lock could be
> acquired/release in drm_client_buffer_vmap/vunmap(), or a separate pair of
> DRM client functions could do the locking.

Given how this "do the right locking" is a can of worms (and I think it's
worse than what you dug out already) I think the fb_helper.lock hack is
perfectly good enough.

I'm also somewhat worried that starting to use dma_resv lock in generic
code, while many helpers/drivers still have their hand-rolled locking,
will make conversion over to dma_resv needlessly more complicated.
-Daniel

> 
> Best regards
> Thomas
> 
> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
> 
> > 
> > Please note that the reservation lock you need to take here is part of
> > the GEM object.
> > 
> > Usually we design things in the way that the code needs to take a lock
> > which protects an object, then do some operations with the object and
> > then release the lock again.
> > 
> > Having in the lock inside the operation can be done as well, but
> > returning with it is kind of unusual design.
> > 
> > > Sorry for the noob questions. I'm still trying to understand the
> > > implications of acquiring these locks.
> > 
> > Well this is the reservation lock of the GEM object we are talking about
> > here. We need to take that for a couple of different operations,
> > vmap/vunmap doesn't sound like a special case to me.
> > 
> > Regards,
> > Christian.
> > 
> > > 
> > > Best regards
> > > Thomas
> > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
Thomas Zimmermann Nov. 25, 2020, 8:28 a.m. UTC | #15
Hi

Am 24.11.20 um 15:06 schrieb Christian König:
> Am 24.11.20 um 14:56 schrieb Thomas Zimmermann:
>> Hi
>>
>> Am 24.11.20 um 14:36 schrieb Christian König:
>>> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>>>> [SNIP]
>>>>>>>> First I wanted to put this into drm_gem_ttm_vmap/vunmap(), but 
>>>>>>>> then wondered why ttm_bo_vmap() doe not acquire the lock 
>>>>>>>> internally? I'd expect that vmap/vunmap are close together and 
>>>>>>>> do not overlap for the same BO. 
>>>>>>>
>>>>>>> We have use cases like the following during command submission:
>>>>>>>
>>>>>>> 1. lock
>>>>>>> 2. map
>>>>>>> 3. copy parts of the BO content somewhere else or patch it with 
>>>>>>> additional information
>>>>>>> 4. unmap
>>>>>>> 5. submit BO to the hardware
>>>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>>>> 7. unlock
>>>>>>>
>>>>>>> That use case won't be possible with vmap/vunmap if we move the 
>>>>>>> lock/unlock into it and I hope to replace the kmap/kunmap 
>>>>>>> functions with them in the near term.
>>>>>>>
>>>>>>>> Otherwise, acquiring the reservation lock would require another 
>>>>>>>> ref-counting variable or per-driver code.
>>>>>>>
>>>>>>> Hui, why that? Just put this into drm_gem_ttm_vmap/vunmap() 
>>>>>>> helper as you initially planned.
>>>>>>
>>>>>> Given your example above, step one would acquire the lock, and 
>>>>>> step two would also acquire the lock as part of the vmap 
>>>>>> implementation. Wouldn't this fail (At least during unmap or 
>>>>>> unlock steps) ?
>>>>>
>>>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>>>
>>>> I don't want to nest/overlap them. My question was whether that 
>>>> would be required. Apparently not.
>>>>
>>>> While the console's BO is being set for scanout, it's protected from 
>>>> movement via the pin/unpin implementation, right?
>>>
>>> Yes, correct.
>>>
>>>> The driver does not acquire the resv lock for longer periods. I'm 
>>>> asking because this would prevent any console-buffer updates while 
>>>> the console is being displayed.
>>>
>>> Correct as well, we only hold the lock for things like command 
>>> submission, pinning, unpinning etc etc....
>>>
>>
>> Thanks for answering my questions.
>>
>>>>
>>>>>
>>>>> You need to make sure that the lock is only taken from the FB path 
>>>>> which wants to vmap the object.
>>>>>
>>>>> Why don't you lock the GEM object from the caller in the generic FB 
>>>>> implementation?
>>>>
>>>> With the current blitter code, it breaks abstraction. if vmap/vunmap 
>>>> hold the lock implicitly, things would be easier.
>>>
>>> Do you have a link to the code?
>>
>> It's the damage blitter in the fbdev code. [1] While it flushes the 
>> shadow buffer into the BO, the BO has to be kept in place. I already 
>> changed it to lock struct drm_fb_helper.lock, but I don't think this 
>> is enough. TTM could still evict the BO concurrently.
> 
> Yeah, that's correct.
> 
> But I still don't fully understand the problem. You just need to change 
> the code like this:
> 
>      mutex_lock(&fb_helper->lock);
>      dma_resv_lock(buffer->gem->resv, NULL);
> 
>      ret = drm_client_buffer_vmap(buffer, &map);
>      if (ret)
>          goto out;
> 
>      dst = map;
>      drm_fb_helper_damage_blit_real(fb_helper, clip, &dst);
> 
>      drm_client_buffer_vunmap(buffer);
> 
> out:
>      dma_resv_unlock(buffer->gem->resv);
>      mutex_unlock(&fb_helper->lock);
> 

Yes, that's the code I had in mind.

> 
> You could abstract that in drm_client functions as well, but I don't 
> really see the value in that.

The fbdev code tries hard to not use GEM directly, but to wrap 
everything behind client interfaces. I'm not sure if I like that, but 
for now I'd stick to this design.

Best regards
Thomas

> 
> Regards,
> Christian.
> 
>> There's no recursion taking place, so I guess the reservation lock 
>> could be acquired/release in drm_client_buffer_vmap/vunmap(), or a 
>> separate pair of DRM client functions could do the locking.
>>
>> Best regards
>> Thomas
>>
>> [1] 
>> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394 
>>
>>
>>>
>>> Please note that the reservation lock you need to take here is part 
>>> of the GEM object.
>>>
>>> Usually we design things in the way that the code needs to take a 
>>> lock which protects an object, then do some operations with the 
>>> object and then release the lock again.
>>>
>>> Having in the lock inside the operation can be done as well, but 
>>> returning with it is kind of unusual design.
>>>
>>>> Sorry for the noob questions. I'm still trying to understand the 
>>>> implications of acquiring these locks.
>>>
>>> Well this is the reservation lock of the GEM object we are talking 
>>> about here. We need to take that for a couple of different 
>>> operations, vmap/vunmap doesn't sound like a special case to me.
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Best regards
>>>> Thomas
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Nov. 25, 2020, 8:37 a.m. UTC | #16
Hi

Am 24.11.20 um 15:09 schrieb Daniel Vetter:
> On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
>> Hi
>>
>> Am 24.11.20 um 14:36 schrieb Christian König:
>>> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>>>> [SNIP]
>>>>>>>> First I wanted to put this into
>>>>>>>> drm_gem_ttm_vmap/vunmap(), but then wondered why
>>>>>>>> ttm_bo_vmap() doe not acquire the lock internally?
>>>>>>>> I'd expect that vmap/vunmap are close together and
>>>>>>>> do not overlap for the same BO.
>>>>>>>
>>>>>>> We have use cases like the following during command submission:
>>>>>>>
>>>>>>> 1. lock
>>>>>>> 2. map
>>>>>>> 3. copy parts of the BO content somewhere else or patch
>>>>>>> it with additional information
>>>>>>> 4. unmap
>>>>>>> 5. submit BO to the hardware
>>>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>>>> 7. unlock
>>>>>>>
>>>>>>> That use case won't be possible with vmap/vunmap if we
>>>>>>> move the lock/unlock into it and I hope to replace the
>>>>>>> kmap/kunmap functions with them in the near term.
>>>>>>>
>>>>>>>> Otherwise, acquiring the reservation lock would
>>>>>>>> require another ref-counting variable or per-driver
>>>>>>>> code.
>>>>>>>
>>>>>>> Hui, why that? Just put this into
>>>>>>> drm_gem_ttm_vmap/vunmap() helper as you initially
>>>>>>> planned.
>>>>>>
>>>>>> Given your example above, step one would acquire the lock,
>>>>>> and step two would also acquire the lock as part of the vmap
>>>>>> implementation. Wouldn't this fail (At least during unmap or
>>>>>> unlock steps) ?
>>>>>
>>>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>>>
>>>> I don't want to nest/overlap them. My question was whether that
>>>> would be required. Apparently not.
>>>>
>>>> While the console's BO is being set for scanout, it's protected from
>>>> movement via the pin/unpin implementation, right?
>>>
>>> Yes, correct.
>>>
>>>> The driver does not acquire the resv lock for longer periods. I'm
>>>> asking because this would prevent any console-buffer updates while
>>>> the console is being displayed.
>>>
>>> Correct as well, we only hold the lock for things like command
>>> submission, pinning, unpinning etc etc....
>>>
>>
>> Thanks for answering my questions.
>>
>>>>
>>>>>
>>>>> You need to make sure that the lock is only taken from the FB
>>>>> path which wants to vmap the object.
>>>>>
>>>>> Why don't you lock the GEM object from the caller in the generic
>>>>> FB implementation?
>>>>
>>>> With the current blitter code, it breaks abstraction. if vmap/vunmap
>>>> hold the lock implicitly, things would be easier.
>>>
>>> Do you have a link to the code?
>>
>> It's the damage blitter in the fbdev code. [1] While it flushes the shadow
>> buffer into the BO, the BO has to be kept in place. I already changed it to
>> lock struct drm_fb_helper.lock, but I don't think this is enough. TTM could
>> still evict the BO concurrently.
> 
> So I'm not sure this is actually a problem: ttm could try to concurrently
> evict the buffer we pinned into vram, and then just skip to the next one.
> 
> Plus atm generic fbdev isn't used on any chip where we really care about
> that last few mb of vram being useable for command submission (well atm
> there's no driver using it).

Well, this is the patchset for radeon. If it works out, amdgpu and 
nouveau are natural next choices. Especially radeon and nouveau support 
cards with low- to medium-sized VRAM. The MiBs wasted on fbdev certainly 
matter.

> 
> Having the buffer pinned into system memory and trying to do a concurrent
> modeset that tries to pull it in is the hard failure mode. And holding
> fb_helper.lock fully prevents that.
> 
> So not really clear on what failure mode you're seeing here?

Imagine the fbdev BO is in VRAM, but not pinned. (Maybe Xorg or Wayland 
is running.) The fbdev BO is a few MiBs and not in use, so TTM would 
want to evict it if memory gets tight.

What I have in mind is a concurrent modeset that requires the memory. 
If we do a concurrent damage blit without protecting against eviction, 
things go boom. Same for concurrent 3d graphics with textures, model 
data, etc.

Best regards
Thomas

> 
>> There's no recursion taking place, so I guess the reservation lock could be
>> acquired/release in drm_client_buffer_vmap/vunmap(), or a separate pair of
>> DRM client functions could do the locking.
> 
> Given how this "do the right locking" is a can of worms (and I think it's
> worse than what you dug out already) I think the fb_helper.lock hack is
> perfectly good enough.
> 
> I'm also somewhat worried that starting to use dma_resv lock in generic
> code, while many helpers/drivers still have their hand-rolled locking,
> will make conversion over to dma_resv needlessly more complicated.
> -Daniel
> 
>>
>> Best regards
>> Thomas
>>
>> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>
>>>
>>> Please note that the reservation lock you need to take here is part of
>>> the GEM object.
>>>
>>> Usually we design things in the way that the code needs to take a lock
>>> which protects an object, then do some operations with the object and
>>> then release the lock again.
>>>
>>> Having in the lock inside the operation can be done as well, but
>>> returning with it is kind of unusual design.
>>>
>>>> Sorry for the noob questions. I'm still trying to understand the
>>>> implications of acquiring these locks.
>>>
>>> Well this is the reservation lock of the GEM object we are talking about
>>> here. We need to take that for a couple of different operations,
>>> vmap/vunmap doesn't sound like a special case to me.
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Best regards
>>>> Thomas
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
>> -- 
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
> 
> 
> 
> 
> 
>
Christian König Nov. 25, 2020, 10:13 a.m. UTC | #17
Am 25.11.20 um 09:37 schrieb Thomas Zimmermann:
> Hi
>
> Am 24.11.20 um 15:09 schrieb Daniel Vetter:
>> On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
>>> Hi
>>>
>>> Am 24.11.20 um 14:36 schrieb Christian König:
>>>> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>>>>> [SNIP]
>>>>>>>>> First I wanted to put this into
>>>>>>>>> drm_gem_ttm_vmap/vunmap(), but then wondered why
>>>>>>>>> ttm_bo_vmap() doe not acquire the lock internally?
>>>>>>>>> I'd expect that vmap/vunmap are close together and
>>>>>>>>> do not overlap for the same BO.
>>>>>>>>
>>>>>>>> We have use cases like the following during command submission:
>>>>>>>>
>>>>>>>> 1. lock
>>>>>>>> 2. map
>>>>>>>> 3. copy parts of the BO content somewhere else or patch
>>>>>>>> it with additional information
>>>>>>>> 4. unmap
>>>>>>>> 5. submit BO to the hardware
>>>>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>>>>> 7. unlock
>>>>>>>>
>>>>>>>> That use case won't be possible with vmap/vunmap if we
>>>>>>>> move the lock/unlock into it and I hope to replace the
>>>>>>>> kmap/kunmap functions with them in the near term.
>>>>>>>>
>>>>>>>>> Otherwise, acquiring the reservation lock would
>>>>>>>>> require another ref-counting variable or per-driver
>>>>>>>>> code.
>>>>>>>>
>>>>>>>> Hui, why that? Just put this into
>>>>>>>> drm_gem_ttm_vmap/vunmap() helper as you initially
>>>>>>>> planned.
>>>>>>>
>>>>>>> Given your example above, step one would acquire the lock,
>>>>>>> and step two would also acquire the lock as part of the vmap
>>>>>>> implementation. Wouldn't this fail (At least during unmap or
>>>>>>> unlock steps) ?
>>>>>>
>>>>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>>>>
>>>>> I don't want to nest/overlap them. My question was whether that
>>>>> would be required. Apparently not.
>>>>>
>>>>> While the console's BO is being set for scanout, it's protected from
>>>>> movement via the pin/unpin implementation, right?
>>>>
>>>> Yes, correct.
>>>>
>>>>> The driver does not acquire the resv lock for longer periods. I'm
>>>>> asking because this would prevent any console-buffer updates while
>>>>> the console is being displayed.
>>>>
>>>> Correct as well, we only hold the lock for things like command
>>>> submission, pinning, unpinning etc etc....
>>>>
>>>
>>> Thanks for answering my questions.
>>>
>>>>>
>>>>>>
>>>>>> You need to make sure that the lock is only taken from the FB
>>>>>> path which wants to vmap the object.
>>>>>>
>>>>>> Why don't you lock the GEM object from the caller in the generic
>>>>>> FB implementation?
>>>>>
>>>>> With the current blitter code, it breaks abstraction. if vmap/vunmap
>>>>> hold the lock implicitly, things would be easier.
>>>>
>>>> Do you have a link to the code?
>>>
>>> It's the damage blitter in the fbdev code. [1] While it flushes the 
>>> shadow
>>> buffer into the BO, the BO has to be kept in place. I already 
>>> changed it to
>>> lock struct drm_fb_helper.lock, but I don't think this is enough. 
>>> TTM could
>>> still evict the BO concurrently.
>>
>> So I'm not sure this is actually a problem: ttm could try to 
>> concurrently
>> evict the buffer we pinned into vram, and then just skip to the next 
>> one.
>>
>> Plus atm generic fbdev isn't used on any chip where we really care about
>> that last few mb of vram being useable for command submission (well atm
>> there's no driver using it).
>
> Well, this is the patchset for radeon. If it works out, amdgpu and 
> nouveau are natural next choices. Especially radeon and nouveau 
> support cards with low- to medium-sized VRAM. The MiBs wasted on fbdev 
> certainly matter.
>
>>
>> Having the buffer pinned into system memory and trying to do a 
>> concurrent
>> modeset that tries to pull it in is the hard failure mode. And holding
>> fb_helper.lock fully prevents that.
>>
>> So not really clear on what failure mode you're seeing here?
>
> Imagine the fbdev BO is in VRAM, but not pinned. (Maybe Xorg or 
> Wayland is running.) The fbdev BO is a few MiBs and not in use, so TTM 
> would want to evict it if memory gets tight.
>
> What I have in mind is a concurrent modeset that requires the memory. 
> If we do a concurrent damage blit without protecting against eviction, 
> things go boom. Same for concurrent 3d graphics with textures, model 
> data, etc.

Completely agree.

This needs proper lock protection of the memory mapped buffer. Relying 
on that some other code isn't run because we have some third part locks 
taken is not sufficient here.

Regards,
Christian.

>
> Best regards
> Thomas
>
>>
>>> There's no recursion taking place, so I guess the reservation lock 
>>> could be
>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a separate 
>>> pair of
>>> DRM client functions could do the locking.
>>
>> Given how this "do the right locking" is a can of worms (and I think 
>> it's
>> worse than what you dug out already) I think the fb_helper.lock hack is
>> perfectly good enough.
>>
>> I'm also somewhat worried that starting to use dma_resv lock in generic
>> code, while many helpers/drivers still have their hand-rolled locking,
>> will make conversion over to dma_resv needlessly more complicated.
>> -Daniel
>>
>>>
>>> Best regards
>>> Thomas
>>>
>>> [1] 
>>> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>>
>>>>
>>>> Please note that the reservation lock you need to take here is part of
>>>> the GEM object.
>>>>
>>>> Usually we design things in the way that the code needs to take a lock
>>>> which protects an object, then do some operations with the object and
>>>> then release the lock again.
>>>>
>>>> Having in the lock inside the operation can be done as well, but
>>>> returning with it is kind of unusual design.
>>>>
>>>>> Sorry for the noob questions. I'm still trying to understand the
>>>>> implications of acquiring these locks.
>>>>
>>>> Well this is the reservation lock of the GEM object we are talking 
>>>> about
>>>> here. We need to take that for a couple of different operations,
>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>
>>> -- 
>>> Thomas Zimmermann
>>> Graphics Driver Developer
>>> SUSE Software Solutions Germany GmbH
>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>> (HRB 36809, AG Nürnberg)
>>> Geschäftsführer: Felix Imendörffer
>>
>>
>>
>>
>>
>>
>
Daniel Vetter Nov. 25, 2020, 10:36 a.m. UTC | #18
On Wed, Nov 25, 2020 at 11:13:13AM +0100, Christian König wrote:
> Am 25.11.20 um 09:37 schrieb Thomas Zimmermann:
> > Hi
> > 
> > Am 24.11.20 um 15:09 schrieb Daniel Vetter:
> > > On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
> > > > Hi
> > > > 
> > > > Am 24.11.20 um 14:36 schrieb Christian König:
> > > > > Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
> > > > > > [SNIP]
> > > > > > > > > > First I wanted to put this into
> > > > > > > > > > drm_gem_ttm_vmap/vunmap(), but then wondered why
> > > > > > > > > > ttm_bo_vmap() doe not acquire the lock internally?
> > > > > > > > > > I'd expect that vmap/vunmap are close together and
> > > > > > > > > > do not overlap for the same BO.
> > > > > > > > > 
> > > > > > > > > We have use cases like the following during command submission:
> > > > > > > > > 
> > > > > > > > > 1. lock
> > > > > > > > > 2. map
> > > > > > > > > 3. copy parts of the BO content somewhere else or patch
> > > > > > > > > it with additional information
> > > > > > > > > 4. unmap
> > > > > > > > > 5. submit BO to the hardware
> > > > > > > > > 6. add hardware fence to the BO to make sure it doesn't move
> > > > > > > > > 7. unlock
> > > > > > > > > 
> > > > > > > > > That use case won't be possible with vmap/vunmap if we
> > > > > > > > > move the lock/unlock into it and I hope to replace the
> > > > > > > > > kmap/kunmap functions with them in the near term.
> > > > > > > > > 
> > > > > > > > > > Otherwise, acquiring the reservation lock would
> > > > > > > > > > require another ref-counting variable or per-driver
> > > > > > > > > > code.
> > > > > > > > > 
> > > > > > > > > Hui, why that? Just put this into
> > > > > > > > > drm_gem_ttm_vmap/vunmap() helper as you initially
> > > > > > > > > planned.
> > > > > > > > 
> > > > > > > > Given your example above, step one would acquire the lock,
> > > > > > > > and step two would also acquire the lock as part of the vmap
> > > > > > > > implementation. Wouldn't this fail (At least during unmap or
> > > > > > > > unlock steps) ?
> > > > > > > 
> > > > > > > Oh, so you want to nest them? No, that is a rather bad no-go.
> > > > > > 
> > > > > > I don't want to nest/overlap them. My question was whether that
> > > > > > would be required. Apparently not.
> > > > > > 
> > > > > > While the console's BO is being set for scanout, it's protected from
> > > > > > movement via the pin/unpin implementation, right?
> > > > > 
> > > > > Yes, correct.
> > > > > 
> > > > > > The driver does not acquire the resv lock for longer periods. I'm
> > > > > > asking because this would prevent any console-buffer updates while
> > > > > > the console is being displayed.
> > > > > 
> > > > > Correct as well, we only hold the lock for things like command
> > > > > submission, pinning, unpinning etc etc....
> > > > > 
> > > > 
> > > > Thanks for answering my questions.
> > > > 
> > > > > > 
> > > > > > > 
> > > > > > > You need to make sure that the lock is only taken from the FB
> > > > > > > path which wants to vmap the object.
> > > > > > > 
> > > > > > > Why don't you lock the GEM object from the caller in the generic
> > > > > > > FB implementation?
> > > > > > 
> > > > > > With the current blitter code, it breaks abstraction. if vmap/vunmap
> > > > > > hold the lock implicitly, things would be easier.
> > > > > 
> > > > > Do you have a link to the code?
> > > > 
> > > > It's the damage blitter in the fbdev code. [1] While it flushes
> > > > the shadow
> > > > buffer into the BO, the BO has to be kept in place. I already
> > > > changed it to
> > > > lock struct drm_fb_helper.lock, but I don't think this is
> > > > enough. TTM could
> > > > still evict the BO concurrently.
> > > 
> > > So I'm not sure this is actually a problem: ttm could try to
> > > concurrently
> > > evict the buffer we pinned into vram, and then just skip to the next
> > > one.
> > > 
> > > Plus atm generic fbdev isn't used on any chip where we really care about
> > > that last few mb of vram being useable for command submission (well atm
> > > there's no driver using it).
> > 
> > Well, this is the patchset for radeon. If it works out, amdgpu and
> > nouveau are natural next choices. Especially radeon and nouveau support
> > cards with low- to medium-sized VRAM. The MiBs wasted on fbdev certainly
> > matter.
> > 
> > > 
> > > Having the buffer pinned into system memory and trying to do a
> > > concurrent
> > > modeset that tries to pull it in is the hard failure mode. And holding
> > > fb_helper.lock fully prevents that.
> > > 
> > > So not really clear on what failure mode you're seeing here?
> > 
> > Imagine the fbdev BO is in VRAM, but not pinned. (Maybe Xorg or Wayland
> > is running.) The fbdev BO is a few MiBs and not in use, so TTM would
> > want to evict it if memory gets tight.
> > 
> > What I have in mind is a concurrent modeset that requires the memory. If
> > we do a concurrent damage blit without protecting against eviction,
> > things go boom. Same for concurrent 3d graphics with textures, model
> > data, etc.
> 
> Completely agree.
> 
> This needs proper lock protection of the memory mapped buffer. Relying on
> that some other code isn't run because we have some third part locks taken
> is not sufficient here.

We are still protected by the pin count in this scenario. Plus, with
current drivers we always pin the fbdev buffer into vram, so occasionally
failing to move it out isn't a regression.

So I'm still not seeing how this can go boom.

Now long term it'd be nice to cut everything over to dma_resv locking, but
the issue there is that beyond ttm, none of the helpers (and few of the
drivers) use dma_resv. So this is a fairly big uphill battle. Quick
interim fix seems like the right solution to me.
-Daniel

> 
> Regards,
> Christian.
> 
> > 
> > Best regards
> > Thomas
> > 
> > > 
> > > > There's no recursion taking place, so I guess the reservation
> > > > lock could be
> > > > acquired/release in drm_client_buffer_vmap/vunmap(), or a
> > > > separate pair of
> > > > DRM client functions could do the locking.
> > > 
> > > Given how this "do the right locking" is a can of worms (and I think
> > > it's
> > > worse than what you dug out already) I think the fb_helper.lock hack is
> > > perfectly good enough.
> > > 
> > > I'm also somewhat worried that starting to use dma_resv lock in generic
> > > code, while many helpers/drivers still have their hand-rolled locking,
> > > will make conversion over to dma_resv needlessly more complicated.
> > > -Daniel
> > > 
> > > > 
> > > > Best regards
> > > > Thomas
> > > > 
> > > > [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
> > > > 
> > > > > 
> > > > > Please note that the reservation lock you need to take here is part of
> > > > > the GEM object.
> > > > > 
> > > > > Usually we design things in the way that the code needs to take a lock
> > > > > which protects an object, then do some operations with the object and
> > > > > then release the lock again.
> > > > > 
> > > > > Having in the lock inside the operation can be done as well, but
> > > > > returning with it is kind of unusual design.
> > > > > 
> > > > > > Sorry for the noob questions. I'm still trying to understand the
> > > > > > implications of acquiring these locks.
> > > > > 
> > > > > Well this is the reservation lock of the GEM object we are
> > > > > talking about
> > > > > here. We need to take that for a couple of different operations,
> > > > > vmap/vunmap doesn't sound like a special case to me.
> > > > > 
> > > > > Regards,
> > > > > Christian.
> > > > > 
> > > > > > 
> > > > > > Best regards
> > > > > > Thomas
> > > > > 
> > > > > _______________________________________________
> > > > > dri-devel mailing list
> > > > > dri-devel@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > > > 
> > > > -- 
> > > > Thomas Zimmermann
> > > > Graphics Driver Developer
> > > > SUSE Software Solutions Germany GmbH
> > > > Maxfeldstr. 5, 90409 Nürnberg, Germany
> > > > (HRB 36809, AG Nürnberg)
> > > > Geschäftsführer: Felix Imendörffer
> > > 
> > > 
> > > 
> > > 
> > > 
> > > 
> > 
>
Christian König Nov. 25, 2020, 10:57 a.m. UTC | #19
Am 25.11.20 um 11:36 schrieb Daniel Vetter:
> On Wed, Nov 25, 2020 at 11:13:13AM +0100, Christian König wrote:
>> Am 25.11.20 um 09:37 schrieb Thomas Zimmermann:
>>> Hi
>>>
>>> Am 24.11.20 um 15:09 schrieb Daniel Vetter:
>>>> On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
>>>>> Hi
>>>>>
>>>>> Am 24.11.20 um 14:36 schrieb Christian König:
>>>>>> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>>>>>>> [SNIP]
>>>>>>>>>>> First I wanted to put this into
>>>>>>>>>>> drm_gem_ttm_vmap/vunmap(), but then wondered why
>>>>>>>>>>> ttm_bo_vmap() doe not acquire the lock internally?
>>>>>>>>>>> I'd expect that vmap/vunmap are close together and
>>>>>>>>>>> do not overlap for the same BO.
>>>>>>>>>> We have use cases like the following during command submission:
>>>>>>>>>>
>>>>>>>>>> 1. lock
>>>>>>>>>> 2. map
>>>>>>>>>> 3. copy parts of the BO content somewhere else or patch
>>>>>>>>>> it with additional information
>>>>>>>>>> 4. unmap
>>>>>>>>>> 5. submit BO to the hardware
>>>>>>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>>>>>>> 7. unlock
>>>>>>>>>>
>>>>>>>>>> That use case won't be possible with vmap/vunmap if we
>>>>>>>>>> move the lock/unlock into it and I hope to replace the
>>>>>>>>>> kmap/kunmap functions with them in the near term.
>>>>>>>>>>
>>>>>>>>>>> Otherwise, acquiring the reservation lock would
>>>>>>>>>>> require another ref-counting variable or per-driver
>>>>>>>>>>> code.
>>>>>>>>>> Hui, why that? Just put this into
>>>>>>>>>> drm_gem_ttm_vmap/vunmap() helper as you initially
>>>>>>>>>> planned.
>>>>>>>>> Given your example above, step one would acquire the lock,
>>>>>>>>> and step two would also acquire the lock as part of the vmap
>>>>>>>>> implementation. Wouldn't this fail (At least during unmap or
>>>>>>>>> unlock steps) ?
>>>>>>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>>>>>> I don't want to nest/overlap them. My question was whether that
>>>>>>> would be required. Apparently not.
>>>>>>>
>>>>>>> While the console's BO is being set for scanout, it's protected from
>>>>>>> movement via the pin/unpin implementation, right?
>>>>>> Yes, correct.
>>>>>>
>>>>>>> The driver does not acquire the resv lock for longer periods. I'm
>>>>>>> asking because this would prevent any console-buffer updates while
>>>>>>> the console is being displayed.
>>>>>> Correct as well, we only hold the lock for things like command
>>>>>> submission, pinning, unpinning etc etc....
>>>>>>
>>>>> Thanks for answering my questions.
>>>>>
>>>>>>>> You need to make sure that the lock is only taken from the FB
>>>>>>>> path which wants to vmap the object.
>>>>>>>>
>>>>>>>> Why don't you lock the GEM object from the caller in the generic
>>>>>>>> FB implementation?
>>>>>>> With the current blitter code, it breaks abstraction. if vmap/vunmap
>>>>>>> hold the lock implicitly, things would be easier.
>>>>>> Do you have a link to the code?
>>>>> It's the damage blitter in the fbdev code. [1] While it flushes
>>>>> the shadow
>>>>> buffer into the BO, the BO has to be kept in place. I already
>>>>> changed it to
>>>>> lock struct drm_fb_helper.lock, but I don't think this is
>>>>> enough. TTM could
>>>>> still evict the BO concurrently.
>>>> So I'm not sure this is actually a problem: ttm could try to
>>>> concurrently
>>>> evict the buffer we pinned into vram, and then just skip to the next
>>>> one.
>>>>
>>>> Plus atm generic fbdev isn't used on any chip where we really care about
>>>> that last few mb of vram being useable for command submission (well atm
>>>> there's no driver using it).
>>> Well, this is the patchset for radeon. If it works out, amdgpu and
>>> nouveau are natural next choices. Especially radeon and nouveau support
>>> cards with low- to medium-sized VRAM. The MiBs wasted on fbdev certainly
>>> matter.
>>>
>>>> Having the buffer pinned into system memory and trying to do a
>>>> concurrent
>>>> modeset that tries to pull it in is the hard failure mode. And holding
>>>> fb_helper.lock fully prevents that.
>>>>
>>>> So not really clear on what failure mode you're seeing here?
>>> Imagine the fbdev BO is in VRAM, but not pinned. (Maybe Xorg or Wayland
>>> is running.) The fbdev BO is a few MiBs and not in use, so TTM would
>>> want to evict it if memory gets tight.
>>>
>>> What I have in mind is a concurrent modeset that requires the memory. If
>>> we do a concurrent damage blit without protecting against eviction,
>>> things go boom. Same for concurrent 3d graphics with textures, model
>>> data, etc.
>> Completely agree.
>>
>> This needs proper lock protection of the memory mapped buffer. Relying on
>> that some other code isn't run because we have some third part locks taken
>> is not sufficient here.
> We are still protected by the pin count in this scenario. Plus, with
> current drivers we always pin the fbdev buffer into vram, so occasionally
> failing to move it out isn't a regression.
>
> So I'm still not seeing how this can go boom.

Well as far as I understand it the pin count is zero for this buffer in 
this case here :)

I might be wrong on this because I don't know the FB code at all, but 
Thomas seems to be pretty clear that this is the shadow buffer which is 
not scanned out from.

Regards,
Christian.

>
> Now long term it'd be nice to cut everything over to dma_resv locking, but
> the issue there is that beyond ttm, none of the helpers (and few of the
> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
> interim fix seems like the right solution to me.
> -Daniel
>
>> Regards,
>> Christian.
>>
>>> Best regards
>>> Thomas
>>>
>>>>> There's no recursion taking place, so I guess the reservation
>>>>> lock could be
>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>> separate pair of
>>>>> DRM client functions could do the locking.
>>>> Given how this "do the right locking" is a can of worms (and I think
>>>> it's
>>>> worse than what you dug out already) I think the fb_helper.lock hack is
>>>> perfectly good enough.
>>>>
>>>> I'm also somewhat worried that starting to use dma_resv lock in generic
>>>> code, while many helpers/drivers still have their hand-rolled locking,
>>>> will make conversion over to dma_resv needlessly more complicated.
>>>> -Daniel
>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>>>>
>>>>>> Please note that the reservation lock you need to take here is part of
>>>>>> the GEM object.
>>>>>>
>>>>>> Usually we design things in the way that the code needs to take a lock
>>>>>> which protects an object, then do some operations with the object and
>>>>>> then release the lock again.
>>>>>>
>>>>>> Having in the lock inside the operation can be done as well, but
>>>>>> returning with it is kind of unusual design.
>>>>>>
>>>>>>> Sorry for the noob questions. I'm still trying to understand the
>>>>>>> implications of acquiring these locks.
>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>> talking about
>>>>>> here. We need to take that for a couple of different operations,
>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> Best regards
>>>>>>> Thomas
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel@lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>> -- 
>>>>> Thomas Zimmermann
>>>>> Graphics Driver Developer
>>>>> SUSE Software Solutions Germany GmbH
>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>> (HRB 36809, AG Nürnberg)
>>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>>
>>>>
>>>>
>>>>
Thomas Zimmermann Nov. 25, 2020, 11:38 a.m. UTC | #20
Hi

Am 25.11.20 um 11:36 schrieb Daniel Vetter:
> On Wed, Nov 25, 2020 at 11:13:13AM +0100, Christian König wrote:
>> Am 25.11.20 um 09:37 schrieb Thomas Zimmermann:
>>> Hi
>>>
>>> Am 24.11.20 um 15:09 schrieb Daniel Vetter:
>>>> On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
>>>>> Hi
>>>>>
>>>>> Am 24.11.20 um 14:36 schrieb Christian König:
>>>>>> Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
>>>>>>> [SNIP]
>>>>>>>>>>> First I wanted to put this into
>>>>>>>>>>> drm_gem_ttm_vmap/vunmap(), but then wondered why
>>>>>>>>>>> ttm_bo_vmap() doe not acquire the lock internally?
>>>>>>>>>>> I'd expect that vmap/vunmap are close together and
>>>>>>>>>>> do not overlap for the same BO.
>>>>>>>>>>
>>>>>>>>>> We have use cases like the following during command submission:
>>>>>>>>>>
>>>>>>>>>> 1. lock
>>>>>>>>>> 2. map
>>>>>>>>>> 3. copy parts of the BO content somewhere else or patch
>>>>>>>>>> it with additional information
>>>>>>>>>> 4. unmap
>>>>>>>>>> 5. submit BO to the hardware
>>>>>>>>>> 6. add hardware fence to the BO to make sure it doesn't move
>>>>>>>>>> 7. unlock
>>>>>>>>>>
>>>>>>>>>> That use case won't be possible with vmap/vunmap if we
>>>>>>>>>> move the lock/unlock into it and I hope to replace the
>>>>>>>>>> kmap/kunmap functions with them in the near term.
>>>>>>>>>>
>>>>>>>>>>> Otherwise, acquiring the reservation lock would
>>>>>>>>>>> require another ref-counting variable or per-driver
>>>>>>>>>>> code.
>>>>>>>>>>
>>>>>>>>>> Hui, why that? Just put this into
>>>>>>>>>> drm_gem_ttm_vmap/vunmap() helper as you initially
>>>>>>>>>> planned.
>>>>>>>>>
>>>>>>>>> Given your example above, step one would acquire the lock,
>>>>>>>>> and step two would also acquire the lock as part of the vmap
>>>>>>>>> implementation. Wouldn't this fail (At least during unmap or
>>>>>>>>> unlock steps) ?
>>>>>>>>
>>>>>>>> Oh, so you want to nest them? No, that is a rather bad no-go.
>>>>>>>
>>>>>>> I don't want to nest/overlap them. My question was whether that
>>>>>>> would be required. Apparently not.
>>>>>>>
>>>>>>> While the console's BO is being set for scanout, it's protected from
>>>>>>> movement via the pin/unpin implementation, right?
>>>>>>
>>>>>> Yes, correct.
>>>>>>
>>>>>>> The driver does not acquire the resv lock for longer periods. I'm
>>>>>>> asking because this would prevent any console-buffer updates while
>>>>>>> the console is being displayed.
>>>>>>
>>>>>> Correct as well, we only hold the lock for things like command
>>>>>> submission, pinning, unpinning etc etc....
>>>>>>
>>>>>
>>>>> Thanks for answering my questions.
>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> You need to make sure that the lock is only taken from the FB
>>>>>>>> path which wants to vmap the object.
>>>>>>>>
>>>>>>>> Why don't you lock the GEM object from the caller in the generic
>>>>>>>> FB implementation?
>>>>>>>
>>>>>>> With the current blitter code, it breaks abstraction. if vmap/vunmap
>>>>>>> hold the lock implicitly, things would be easier.
>>>>>>
>>>>>> Do you have a link to the code?
>>>>>
>>>>> It's the damage blitter in the fbdev code. [1] While it flushes
>>>>> the shadow
>>>>> buffer into the BO, the BO has to be kept in place. I already
>>>>> changed it to
>>>>> lock struct drm_fb_helper.lock, but I don't think this is
>>>>> enough. TTM could
>>>>> still evict the BO concurrently.
>>>>
>>>> So I'm not sure this is actually a problem: ttm could try to
>>>> concurrently
>>>> evict the buffer we pinned into vram, and then just skip to the next
>>>> one.
>>>>
>>>> Plus atm generic fbdev isn't used on any chip where we really care about
>>>> that last few mb of vram being useable for command submission (well atm
>>>> there's no driver using it).
>>>
>>> Well, this is the patchset for radeon. If it works out, amdgpu and
>>> nouveau are natural next choices. Especially radeon and nouveau support
>>> cards with low- to medium-sized VRAM. The MiBs wasted on fbdev certainly
>>> matter.
>>>
>>>>
>>>> Having the buffer pinned into system memory and trying to do a
>>>> concurrent
>>>> modeset that tries to pull it in is the hard failure mode. And holding
>>>> fb_helper.lock fully prevents that.
>>>>
>>>> So not really clear on what failure mode you're seeing here?
>>>
>>> Imagine the fbdev BO is in VRAM, but not pinned. (Maybe Xorg or Wayland
>>> is running.) The fbdev BO is a few MiBs and not in use, so TTM would
>>> want to evict it if memory gets tight.
>>>
>>> What I have in mind is a concurrent modeset that requires the memory. If
>>> we do a concurrent damage blit without protecting against eviction,
>>> things go boom. Same for concurrent 3d graphics with textures, model
>>> data, etc.
>>
>> Completely agree.
>>
>> This needs proper lock protection of the memory mapped buffer. Relying on
>> that some other code isn't run because we have some third part locks taken
>> is not sufficient here.
> 
> We are still protected by the pin count in this scenario. Plus, with
> current drivers we always pin the fbdev buffer into vram, so occasionally
> failing to move it out isn't a regression.

Why is this protected by the pin count? The counter should be zero in 
this scenario. Otherwise, we could not evict the fbdev BO on all those 
systems where that's a hard requirement (e.g., ast).

The pin count is currently maintained by the vmap implementation in vram 
helpers. Calling vmap is an implicit pin; calling vunmap is an implicit 
unpin. This prevents eviction in the damage worker. But now I was told 
than pinning is only for BOs that are controlled by userspace and 
internal users should acquire the resv lock. So vram helpers have to be 
fixed, actually.

In vram helpers, unmapping does not mean eviction. The unmap operation 
only marks the BO as unmappable. The real unmap happens when the 
eviction takes place. This avoids many modifications to the page tables. 
IOW an unpinned, unmapped BO will remain in VRAM until the memory is 
actually needed.

Best regards
Thomas

> 
> So I'm still not seeing how this can go boom.
> 
> Now long term it'd be nice to cut everything over to dma_resv locking, but
> the issue there is that beyond ttm, none of the helpers (and few of the
> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
> interim fix seems like the right solution to me.
> -Daniel
> 
>>
>> Regards,
>> Christian.
>>
>>>
>>> Best regards
>>> Thomas
>>>
>>>>
>>>>> There's no recursion taking place, so I guess the reservation
>>>>> lock could be
>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>> separate pair of
>>>>> DRM client functions could do the locking.
>>>>
>>>> Given how this "do the right locking" is a can of worms (and I think
>>>> it's
>>>> worse than what you dug out already) I think the fb_helper.lock hack is
>>>> perfectly good enough.
>>>>
>>>> I'm also somewhat worried that starting to use dma_resv lock in generic
>>>> code, while many helpers/drivers still have their hand-rolled locking,
>>>> will make conversion over to dma_resv needlessly more complicated.
>>>> -Daniel
>>>>
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>>>>
>>>>>>
>>>>>> Please note that the reservation lock you need to take here is part of
>>>>>> the GEM object.
>>>>>>
>>>>>> Usually we design things in the way that the code needs to take a lock
>>>>>> which protects an object, then do some operations with the object and
>>>>>> then release the lock again.
>>>>>>
>>>>>> Having in the lock inside the operation can be done as well, but
>>>>>> returning with it is kind of unusual design.
>>>>>>
>>>>>>> Sorry for the noob questions. I'm still trying to understand the
>>>>>>> implications of acquiring these locks.
>>>>>>
>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>> talking about
>>>>>> here. We need to take that for a couple of different operations,
>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>
>>>>>>> Best regards
>>>>>>> Thomas
>>>>>>
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel@lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>
>>>>> -- 
>>>>> Thomas Zimmermann
>>>>> Graphics Driver Developer
>>>>> SUSE Software Solutions Germany GmbH
>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>> (HRB 36809, AG Nürnberg)
>>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>
>>
>
Daniel Vetter Nov. 25, 2020, 4:32 p.m. UTC | #21
On Wed, Nov 25, 2020 at 12:38:01PM +0100, Thomas Zimmermann wrote:
> Hi
> 
> Am 25.11.20 um 11:36 schrieb Daniel Vetter:
> > On Wed, Nov 25, 2020 at 11:13:13AM +0100, Christian König wrote:
> > > Am 25.11.20 um 09:37 schrieb Thomas Zimmermann:
> > > > Hi
> > > > 
> > > > Am 24.11.20 um 15:09 schrieb Daniel Vetter:
> > > > > On Tue, Nov 24, 2020 at 02:56:51PM +0100, Thomas Zimmermann wrote:
> > > > > > Hi
> > > > > > 
> > > > > > Am 24.11.20 um 14:36 schrieb Christian König:
> > > > > > > Am 24.11.20 um 13:15 schrieb Thomas Zimmermann:
> > > > > > > > [SNIP]
> > > > > > > > > > > > First I wanted to put this into
> > > > > > > > > > > > drm_gem_ttm_vmap/vunmap(), but then wondered why
> > > > > > > > > > > > ttm_bo_vmap() doe not acquire the lock internally?
> > > > > > > > > > > > I'd expect that vmap/vunmap are close together and
> > > > > > > > > > > > do not overlap for the same BO.
> > > > > > > > > > > 
> > > > > > > > > > > We have use cases like the following during command submission:
> > > > > > > > > > > 
> > > > > > > > > > > 1. lock
> > > > > > > > > > > 2. map
> > > > > > > > > > > 3. copy parts of the BO content somewhere else or patch
> > > > > > > > > > > it with additional information
> > > > > > > > > > > 4. unmap
> > > > > > > > > > > 5. submit BO to the hardware
> > > > > > > > > > > 6. add hardware fence to the BO to make sure it doesn't move
> > > > > > > > > > > 7. unlock
> > > > > > > > > > > 
> > > > > > > > > > > That use case won't be possible with vmap/vunmap if we
> > > > > > > > > > > move the lock/unlock into it and I hope to replace the
> > > > > > > > > > > kmap/kunmap functions with them in the near term.
> > > > > > > > > > > 
> > > > > > > > > > > > Otherwise, acquiring the reservation lock would
> > > > > > > > > > > > require another ref-counting variable or per-driver
> > > > > > > > > > > > code.
> > > > > > > > > > > 
> > > > > > > > > > > Hui, why that? Just put this into
> > > > > > > > > > > drm_gem_ttm_vmap/vunmap() helper as you initially
> > > > > > > > > > > planned.
> > > > > > > > > > 
> > > > > > > > > > Given your example above, step one would acquire the lock,
> > > > > > > > > > and step two would also acquire the lock as part of the vmap
> > > > > > > > > > implementation. Wouldn't this fail (At least during unmap or
> > > > > > > > > > unlock steps) ?
> > > > > > > > > 
> > > > > > > > > Oh, so you want to nest them? No, that is a rather bad no-go.
> > > > > > > > 
> > > > > > > > I don't want to nest/overlap them. My question was whether that
> > > > > > > > would be required. Apparently not.
> > > > > > > > 
> > > > > > > > While the console's BO is being set for scanout, it's protected from
> > > > > > > > movement via the pin/unpin implementation, right?
> > > > > > > 
> > > > > > > Yes, correct.
> > > > > > > 
> > > > > > > > The driver does not acquire the resv lock for longer periods. I'm
> > > > > > > > asking because this would prevent any console-buffer updates while
> > > > > > > > the console is being displayed.
> > > > > > > 
> > > > > > > Correct as well, we only hold the lock for things like command
> > > > > > > submission, pinning, unpinning etc etc....
> > > > > > > 
> > > > > > 
> > > > > > Thanks for answering my questions.
> > > > > > 
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > You need to make sure that the lock is only taken from the FB
> > > > > > > > > path which wants to vmap the object.
> > > > > > > > > 
> > > > > > > > > Why don't you lock the GEM object from the caller in the generic
> > > > > > > > > FB implementation?
> > > > > > > > 
> > > > > > > > With the current blitter code, it breaks abstraction. if vmap/vunmap
> > > > > > > > hold the lock implicitly, things would be easier.
> > > > > > > 
> > > > > > > Do you have a link to the code?
> > > > > > 
> > > > > > It's the damage blitter in the fbdev code. [1] While it flushes
> > > > > > the shadow
> > > > > > buffer into the BO, the BO has to be kept in place. I already
> > > > > > changed it to
> > > > > > lock struct drm_fb_helper.lock, but I don't think this is
> > > > > > enough. TTM could
> > > > > > still evict the BO concurrently.
> > > > > 
> > > > > So I'm not sure this is actually a problem: ttm could try to
> > > > > concurrently
> > > > > evict the buffer we pinned into vram, and then just skip to the next
> > > > > one.
> > > > > 
> > > > > Plus atm generic fbdev isn't used on any chip where we really care about
> > > > > that last few mb of vram being useable for command submission (well atm
> > > > > there's no driver using it).
> > > > 
> > > > Well, this is the patchset for radeon. If it works out, amdgpu and
> > > > nouveau are natural next choices. Especially radeon and nouveau support
> > > > cards with low- to medium-sized VRAM. The MiBs wasted on fbdev certainly
> > > > matter.
> > > > 
> > > > > 
> > > > > Having the buffer pinned into system memory and trying to do a
> > > > > concurrent
> > > > > modeset that tries to pull it in is the hard failure mode. And holding
> > > > > fb_helper.lock fully prevents that.
> > > > > 
> > > > > So not really clear on what failure mode you're seeing here?
> > > > 
> > > > Imagine the fbdev BO is in VRAM, but not pinned. (Maybe Xorg or Wayland
> > > > is running.) The fbdev BO is a few MiBs and not in use, so TTM would
> > > > want to evict it if memory gets tight.
> > > > 
> > > > What I have in mind is a concurrent modeset that requires the memory. If
> > > > we do a concurrent damage blit without protecting against eviction,
> > > > things go boom. Same for concurrent 3d graphics with textures, model
> > > > data, etc.
> > > 
> > > Completely agree.
> > > 
> > > This needs proper lock protection of the memory mapped buffer. Relying on
> > > that some other code isn't run because we have some third part locks taken
> > > is not sufficient here.
> > 
> > We are still protected by the pin count in this scenario. Plus, with
> > current drivers we always pin the fbdev buffer into vram, so occasionally
> > failing to move it out isn't a regression.
> 
> Why is this protected by the pin count? The counter should be zero in this
> scenario. Otherwise, we could not evict the fbdev BO on all those systems
> where that's a hard requirement (e.g., ast).

Ah yes, I mixed that up.

I guess full locking is required :-/ I'm not exactly sure how to make this
happen with the current plethora of helpers ... I think we need an
_locked version of vmap/vunmap callbacks in drm_gem_object_funcs.

And then document that if the callers of the _locked version wants a
permanent mapping, it also needs to pin it. Plus I guess ideally implement
the unlocked/permanent versions in terms of that, so that drivers only
have to implement one or the other.

That should give us at least some way forward to gradually conver all the
drivers and helpers over to dma_resv locking.
-Daniel

> The pin count is currently maintained by the vmap implementation in vram
> helpers. Calling vmap is an implicit pin; calling vunmap is an implicit
> unpin. This prevents eviction in the damage worker. But now I was told than
> pinning is only for BOs that are controlled by userspace and internal users
> should acquire the resv lock. So vram helpers have to be fixed, actually.
> 
> In vram helpers, unmapping does not mean eviction. The unmap operation only
> marks the BO as unmappable. The real unmap happens when the eviction takes
> place. This avoids many modifications to the page tables. IOW an unpinned,
> unmapped BO will remain in VRAM until the memory is actually needed.
> 
> Best regards
> Thomas
> 
> > 
> > So I'm still not seeing how this can go boom.
> > 
> > Now long term it'd be nice to cut everything over to dma_resv locking, but
> > the issue there is that beyond ttm, none of the helpers (and few of the
> > drivers) use dma_resv. So this is a fairly big uphill battle. Quick
> > interim fix seems like the right solution to me.
> > -Daniel
> > 
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > 
> > > > Best regards
> > > > Thomas
> > > > 
> > > > > 
> > > > > > There's no recursion taking place, so I guess the reservation
> > > > > > lock could be
> > > > > > acquired/release in drm_client_buffer_vmap/vunmap(), or a
> > > > > > separate pair of
> > > > > > DRM client functions could do the locking.
> > > > > 
> > > > > Given how this "do the right locking" is a can of worms (and I think
> > > > > it's
> > > > > worse than what you dug out already) I think the fb_helper.lock hack is
> > > > > perfectly good enough.
> > > > > 
> > > > > I'm also somewhat worried that starting to use dma_resv lock in generic
> > > > > code, while many helpers/drivers still have their hand-rolled locking,
> > > > > will make conversion over to dma_resv needlessly more complicated.
> > > > > -Daniel
> > > > > 
> > > > > > 
> > > > > > Best regards
> > > > > > Thomas
> > > > > > 
> > > > > > [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
> > > > > > 
> > > > > > > 
> > > > > > > Please note that the reservation lock you need to take here is part of
> > > > > > > the GEM object.
> > > > > > > 
> > > > > > > Usually we design things in the way that the code needs to take a lock
> > > > > > > which protects an object, then do some operations with the object and
> > > > > > > then release the lock again.
> > > > > > > 
> > > > > > > Having in the lock inside the operation can be done as well, but
> > > > > > > returning with it is kind of unusual design.
> > > > > > > 
> > > > > > > > Sorry for the noob questions. I'm still trying to understand the
> > > > > > > > implications of acquiring these locks.
> > > > > > > 
> > > > > > > Well this is the reservation lock of the GEM object we are
> > > > > > > talking about
> > > > > > > here. We need to take that for a couple of different operations,
> > > > > > > vmap/vunmap doesn't sound like a special case to me.
> > > > > > > 
> > > > > > > Regards,
> > > > > > > Christian.
> > > > > > > 
> > > > > > > > 
> > > > > > > > Best regards
> > > > > > > > Thomas
> > > > > > > 
> > > > > > > _______________________________________________
> > > > > > > dri-devel mailing list
> > > > > > > dri-devel@lists.freedesktop.org
> > > > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > > > > > 
> > > > > > -- 
> > > > > > Thomas Zimmermann
> > > > > > Graphics Driver Developer
> > > > > > SUSE Software Solutions Germany GmbH
> > > > > > Maxfeldstr. 5, 90409 Nürnberg, Germany
> > > > > > (HRB 36809, AG Nürnberg)
> > > > > > Geschäftsführer: Felix Imendörffer
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > 
> > > 
> > 
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
Thomas Zimmermann Nov. 26, 2020, 10:15 a.m. UTC | #22
Hi

Am 25.11.20 um 17:32 schrieb Daniel Vetter:
> [...]
> I guess full locking is required :-/ I'm not exactly sure how to make this
> happen with the current plethora of helpers ... I think we need an
> _locked version of vmap/vunmap callbacks in drm_gem_object_funcs.

I think we might be able to get away without new callbacks.

I looked through the sources that implement and use vmap. All the 
implementations are without taking resv locks. For locking, we can wrap 
them in lock/unlock pairs. Having something like drm_gem_vmap_unlocked() 
that locks and vmaps should make this easy.

In terms of implementation, only vram helpers do a pin+map in their vmap 
code. And as I mentioned before, this is actually wrong. The pattern 
dates back to when the code was still in individual drivers. It's time 
to clean this up. Vram helpers can use drm_gem_ttm_vmap() instead.

Finally, there aren't that many users of vmap. A few drivers use it 
while blitting framebuffers into HW buffers and ast does some permanent 
mapping of the cursor BO. All this is trivial to turn into small pairs
of lock+vmap and vunmap+unlock.

That leaves generic fbdev. The shadow buffer is also trivial to fix, as 
outlined during this discussion.

The code for fbdev in hardware buffers is a special case. It vmaps the 
buffer during initialization and only vunmaps it during shutdown. As 
this has worked so far without locking, I'd leave it as it is and put a 
big comment next to is.

Hardware fbdev buffers is only required by few drivers; namely those 
that require the CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM config option to work. 
We should consider to make the fbdev shadow buffer the default and have 
drivers opt-in for the hardware buffer, if they need it.

> 
> And then document that if the callers of the _locked version wants a
> permanent mapping, it also needs to pin it. Plus I guess ideally implement
> the unlocked/permanent versions in terms of that, so that drivers only
> have to implement one or the other.

For my understanding, pinning is only done in prepare_fb code. And ast 
pins its cursor BOs into vram. We should document to hols vmap/vunmap 
only for time and cover them with resv locks. Pinning is for cases where 
the hardware requires buffers in a special location, but does not 
protect against concurrent threat. I think those are the implicit rules 
already.

I updated the radeon patchset, where all this appears to be working well.

Best regards
Thomas

> 
> That should give us at least some way forward to gradually conver all the
> drivers and helpers over to dma_resv locking.
> -Daniel
> 
>> The pin count is currently maintained by the vmap implementation in vram
>> helpers. Calling vmap is an implicit pin; calling vunmap is an implicit
>> unpin. This prevents eviction in the damage worker. But now I was told than
>> pinning is only for BOs that are controlled by userspace and internal users
>> should acquire the resv lock. So vram helpers have to be fixed, actually.
>>
>> In vram helpers, unmapping does not mean eviction. The unmap operation only
>> marks the BO as unmappable. The real unmap happens when the eviction takes
>> place. This avoids many modifications to the page tables. IOW an unpinned,
>> unmapped BO will remain in VRAM until the memory is actually needed.
>>
>> Best regards
>> Thomas
>>
>>>
>>> So I'm still not seeing how this can go boom.
>>>
>>> Now long term it'd be nice to cut everything over to dma_resv locking, but
>>> the issue there is that beyond ttm, none of the helpers (and few of the
>>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
>>> interim fix seems like the right solution to me.
>>> -Daniel
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>>>
>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>> lock could be
>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>> separate pair of
>>>>>>> DRM client functions could do the locking.
>>>>>>
>>>>>> Given how this "do the right locking" is a can of worms (and I think
>>>>>> it's
>>>>>> worse than what you dug out already) I think the fb_helper.lock hack is
>>>>>> perfectly good enough.
>>>>>>
>>>>>> I'm also somewhat worried that starting to use dma_resv lock in generic
>>>>>> code, while many helpers/drivers still have their hand-rolled locking,
>>>>>> will make conversion over to dma_resv needlessly more complicated.
>>>>>> -Daniel
>>>>>>
>>>>>>>
>>>>>>> Best regards
>>>>>>> Thomas
>>>>>>>
>>>>>>> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>>>>>>
>>>>>>>>
>>>>>>>> Please note that the reservation lock you need to take here is part of
>>>>>>>> the GEM object.
>>>>>>>>
>>>>>>>> Usually we design things in the way that the code needs to take a lock
>>>>>>>> which protects an object, then do some operations with the object and
>>>>>>>> then release the lock again.
>>>>>>>>
>>>>>>>> Having in the lock inside the operation can be done as well, but
>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>
>>>>>>>>> Sorry for the noob questions. I'm still trying to understand the
>>>>>>>>> implications of acquiring these locks.
>>>>>>>>
>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>> talking about
>>>>>>>> here. We need to take that for a couple of different operations,
>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Best regards
>>>>>>>>> Thomas
>>>>>>>>
>>>>>>>> _______________________________________________
>>>>>>>> dri-devel mailing list
>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>>>
>>>>>>> -- 
>>>>>>> Thomas Zimmermann
>>>>>>> Graphics Driver Developer
>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>> -- 
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
> 
> 
> 
> 
> 
>
Daniel Vetter Nov. 26, 2020, 11:04 a.m. UTC | #23
On Thu, Nov 26, 2020 at 11:15 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>
> Hi
>
> Am 25.11.20 um 17:32 schrieb Daniel Vetter:
> > [...]
> > I guess full locking is required :-/ I'm not exactly sure how to make this
> > happen with the current plethora of helpers ... I think we need an
> > _locked version of vmap/vunmap callbacks in drm_gem_object_funcs.
>
> I think we might be able to get away without new callbacks.
>
> I looked through the sources that implement and use vmap. All the
> implementations are without taking resv locks. For locking, we can wrap
> them in lock/unlock pairs. Having something like drm_gem_vmap_unlocked()
> that locks and vmaps should make this easy.
>
> In terms of implementation, only vram helpers do a pin+map in their vmap
> code. And as I mentioned before, this is actually wrong. The pattern
> dates back to when the code was still in individual drivers. It's time
> to clean this up. Vram helpers can use drm_gem_ttm_vmap() instead.
>
> Finally, there aren't that many users of vmap. A few drivers use it
> while blitting framebuffers into HW buffers and ast does some permanent
> mapping of the cursor BO. All this is trivial to turn into small pairs
> of lock+vmap and vunmap+unlock.
>
> That leaves generic fbdev. The shadow buffer is also trivial to fix, as
> outlined during this discussion.
>
> The code for fbdev in hardware buffers is a special case. It vmaps the
> buffer during initialization and only vunmaps it during shutdown. As
> this has worked so far without locking, I'd leave it as it is and put a
> big comment next to is.
>
> Hardware fbdev buffers is only required by few drivers; namely those
> that require the CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM config option to work.
> We should consider to make the fbdev shadow buffer the default and have
> drivers opt-in for the hardware buffer, if they need it.
>
> >
> > And then document that if the callers of the _locked version wants a
> > permanent mapping, it also needs to pin it. Plus I guess ideally implement
> > the unlocked/permanent versions in terms of that, so that drivers only
> > have to implement one or the other.
>
> For my understanding, pinning is only done in prepare_fb code. And ast
> pins its cursor BOs into vram. We should document to hols vmap/vunmap
> only for time and cover them with resv locks. Pinning is for cases where
> the hardware requires buffers in a special location, but does not
> protect against concurrent threat. I think those are the implicit rules
> already.
>
> I updated the radeon patchset, where all this appears to be working well.

Hm yeah if you want to do the full change, then that works out too.
It's just a pile of work.

But if we can finish off with an dma_resv_assert_locked in
dma_buf_vmap/vunmap, then I think that's ok. It does mean that
exporters must implement vmap caching, or performance will be
terrible. So quite some update for the dma-buf docs.

But if you're willing to do all that conversion of callers, then of
course I'm not stopping you. Not at all, it's great to see that kind
of maze untangled.
-Daniel

>
> Best regards
> Thomas
>
> >
> > That should give us at least some way forward to gradually conver all the
> > drivers and helpers over to dma_resv locking.
> > -Daniel
> >
> >> The pin count is currently maintained by the vmap implementation in vram
> >> helpers. Calling vmap is an implicit pin; calling vunmap is an implicit
> >> unpin. This prevents eviction in the damage worker. But now I was told than
> >> pinning is only for BOs that are controlled by userspace and internal users
> >> should acquire the resv lock. So vram helpers have to be fixed, actually.
> >>
> >> In vram helpers, unmapping does not mean eviction. The unmap operation only
> >> marks the BO as unmappable. The real unmap happens when the eviction takes
> >> place. This avoids many modifications to the page tables. IOW an unpinned,
> >> unmapped BO will remain in VRAM until the memory is actually needed.
> >>
> >> Best regards
> >> Thomas
> >>
> >>>
> >>> So I'm still not seeing how this can go boom.
> >>>
> >>> Now long term it'd be nice to cut everything over to dma_resv locking, but
> >>> the issue there is that beyond ttm, none of the helpers (and few of the
> >>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
> >>> interim fix seems like the right solution to me.
> >>> -Daniel
> >>>
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>>
> >>>>> Best regards
> >>>>> Thomas
> >>>>>
> >>>>>>
> >>>>>>> There's no recursion taking place, so I guess the reservation
> >>>>>>> lock could be
> >>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
> >>>>>>> separate pair of
> >>>>>>> DRM client functions could do the locking.
> >>>>>>
> >>>>>> Given how this "do the right locking" is a can of worms (and I think
> >>>>>> it's
> >>>>>> worse than what you dug out already) I think the fb_helper.lock hack is
> >>>>>> perfectly good enough.
> >>>>>>
> >>>>>> I'm also somewhat worried that starting to use dma_resv lock in generic
> >>>>>> code, while many helpers/drivers still have their hand-rolled locking,
> >>>>>> will make conversion over to dma_resv needlessly more complicated.
> >>>>>> -Daniel
> >>>>>>
> >>>>>>>
> >>>>>>> Best regards
> >>>>>>> Thomas
> >>>>>>>
> >>>>>>> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
> >>>>>>>
> >>>>>>>>
> >>>>>>>> Please note that the reservation lock you need to take here is part of
> >>>>>>>> the GEM object.
> >>>>>>>>
> >>>>>>>> Usually we design things in the way that the code needs to take a lock
> >>>>>>>> which protects an object, then do some operations with the object and
> >>>>>>>> then release the lock again.
> >>>>>>>>
> >>>>>>>> Having in the lock inside the operation can be done as well, but
> >>>>>>>> returning with it is kind of unusual design.
> >>>>>>>>
> >>>>>>>>> Sorry for the noob questions. I'm still trying to understand the
> >>>>>>>>> implications of acquiring these locks.
> >>>>>>>>
> >>>>>>>> Well this is the reservation lock of the GEM object we are
> >>>>>>>> talking about
> >>>>>>>> here. We need to take that for a couple of different operations,
> >>>>>>>> vmap/vunmap doesn't sound like a special case to me.
> >>>>>>>>
> >>>>>>>> Regards,
> >>>>>>>> Christian.
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>> Best regards
> >>>>>>>>> Thomas
> >>>>>>>>
> >>>>>>>> _______________________________________________
> >>>>>>>> dri-devel mailing list
> >>>>>>>> dri-devel@lists.freedesktop.org
> >>>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >>>>>>>
> >>>>>>> --
> >>>>>>> Thomas Zimmermann
> >>>>>>> Graphics Driver Developer
> >>>>>>> SUSE Software Solutions Germany GmbH
> >>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >>>>>>> (HRB 36809, AG Nürnberg)
> >>>>>>> Geschäftsführer: Felix Imendörffer
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>
> >>>>
> >>>
> >>
> >> --
> >> Thomas Zimmermann
> >> Graphics Driver Developer
> >> SUSE Software Solutions Germany GmbH
> >> Maxfeldstr. 5, 90409 Nürnberg, Germany
> >> (HRB 36809, AG Nürnberg)
> >> Geschäftsführer: Felix Imendörffer
> >
> >
> >
> >
> >
> >
>
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
Christian König Nov. 26, 2020, 11:28 a.m. UTC | #24
Am 26.11.20 um 12:04 schrieb Daniel Vetter:
> On Thu, Nov 26, 2020 at 11:15 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>> Hi
>>
>> Am 25.11.20 um 17:32 schrieb Daniel Vetter:
>>> [...]
>>> I guess full locking is required :-/ I'm not exactly sure how to make this
>>> happen with the current plethora of helpers ... I think we need an
>>> _locked version of vmap/vunmap callbacks in drm_gem_object_funcs.
>> I think we might be able to get away without new callbacks.
>>
>> I looked through the sources that implement and use vmap. All the
>> implementations are without taking resv locks. For locking, we can wrap
>> them in lock/unlock pairs. Having something like drm_gem_vmap_unlocked()
>> that locks and vmaps should make this easy.
>>
>> In terms of implementation, only vram helpers do a pin+map in their vmap
>> code. And as I mentioned before, this is actually wrong. The pattern
>> dates back to when the code was still in individual drivers. It's time
>> to clean this up. Vram helpers can use drm_gem_ttm_vmap() instead.
>>
>> Finally, there aren't that many users of vmap. A few drivers use it
>> while blitting framebuffers into HW buffers and ast does some permanent
>> mapping of the cursor BO. All this is trivial to turn into small pairs
>> of lock+vmap and vunmap+unlock.
>>
>> That leaves generic fbdev. The shadow buffer is also trivial to fix, as
>> outlined during this discussion.
>>
>> The code for fbdev in hardware buffers is a special case. It vmaps the
>> buffer during initialization and only vunmaps it during shutdown. As
>> this has worked so far without locking, I'd leave it as it is and put a
>> big comment next to is.

Please keep in mind that you only need to grab the lock if the buffer is 
not pinned otherwise.

In other words when we are scanning out from the BO it is guaranteed 
that it can't move around.

Maybe this makes the case here easier to handle.

>>
>> Hardware fbdev buffers is only required by few drivers; namely those
>> that require the CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM config option to work.
>> We should consider to make the fbdev shadow buffer the default and have
>> drivers opt-in for the hardware buffer, if they need it.
>>
>>> And then document that if the callers of the _locked version wants a
>>> permanent mapping, it also needs to pin it. Plus I guess ideally implement
>>> the unlocked/permanent versions in terms of that, so that drivers only
>>> have to implement one or the other.
>> For my understanding, pinning is only done in prepare_fb code. And ast
>> pins its cursor BOs into vram. We should document to hols vmap/vunmap
>> only for time and cover them with resv locks. Pinning is for cases where
>> the hardware requires buffers in a special location, but does not
>> protect against concurrent threat. I think those are the implicit rules
>> already.
>>
>> I updated the radeon patchset, where all this appears to be working well.
> Hm yeah if you want to do the full change, then that works out too.
> It's just a pile of work.
>
> But if we can finish off with an dma_resv_assert_locked in
> dma_buf_vmap/vunmap, then I think that's ok. It does mean that
> exporters must implement vmap caching, or performance will be
> terrible. So quite some update for the dma-buf docs.

That's one possibility, but I think we should keep the ability to use 
pin+vmap instead of lock+vmap.

Regards,
Christian.

>
> But if you're willing to do all that conversion of callers, then of
> course I'm not stopping you. Not at all, it's great to see that kind
> of maze untangled.
> -Daniel
>
>> Best regards
>> Thomas
>>
>>> That should give us at least some way forward to gradually conver all the
>>> drivers and helpers over to dma_resv locking.
>>> -Daniel
>>>
>>>> The pin count is currently maintained by the vmap implementation in vram
>>>> helpers. Calling vmap is an implicit pin; calling vunmap is an implicit
>>>> unpin. This prevents eviction in the damage worker. But now I was told than
>>>> pinning is only for BOs that are controlled by userspace and internal users
>>>> should acquire the resv lock. So vram helpers have to be fixed, actually.
>>>>
>>>> In vram helpers, unmapping does not mean eviction. The unmap operation only
>>>> marks the BO as unmappable. The real unmap happens when the eviction takes
>>>> place. This avoids many modifications to the page tables. IOW an unpinned,
>>>> unmapped BO will remain in VRAM until the memory is actually needed.
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>>> So I'm still not seeing how this can go boom.
>>>>>
>>>>> Now long term it'd be nice to cut everything over to dma_resv locking, but
>>>>> the issue there is that beyond ttm, none of the helpers (and few of the
>>>>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
>>>>> interim fix seems like the right solution to me.
>>>>> -Daniel
>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> Best regards
>>>>>>> Thomas
>>>>>>>
>>>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>>>> lock could be
>>>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>>>> separate pair of
>>>>>>>>> DRM client functions could do the locking.
>>>>>>>> Given how this "do the right locking" is a can of worms (and I think
>>>>>>>> it's
>>>>>>>> worse than what you dug out already) I think the fb_helper.lock hack is
>>>>>>>> perfectly good enough.
>>>>>>>>
>>>>>>>> I'm also somewhat worried that starting to use dma_resv lock in generic
>>>>>>>> code, while many helpers/drivers still have their hand-rolled locking,
>>>>>>>> will make conversion over to dma_resv needlessly more complicated.
>>>>>>>> -Daniel
>>>>>>>>
>>>>>>>>> Best regards
>>>>>>>>> Thomas
>>>>>>>>>
>>>>>>>>> [1] https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-tip%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%3Fid%3Dac60f3f3090115d21f028bffa2dcfb67f695c4f2%23n394&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C73458d36471547ca128008d891fb0958%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637419854682660550%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Cky%2BozENU1nsd4hlfAdsvA6wC0RXsex7gpFuvHlCROM%3D&amp;reserved=0
>>>>>>>>>
>>>>>>>>>> Please note that the reservation lock you need to take here is part of
>>>>>>>>>> the GEM object.
>>>>>>>>>>
>>>>>>>>>> Usually we design things in the way that the code needs to take a lock
>>>>>>>>>> which protects an object, then do some operations with the object and
>>>>>>>>>> then release the lock again.
>>>>>>>>>>
>>>>>>>>>> Having in the lock inside the operation can be done as well, but
>>>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>>>
>>>>>>>>>>> Sorry for the noob questions. I'm still trying to understand the
>>>>>>>>>>> implications of acquiring these locks.
>>>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>>>> talking about
>>>>>>>>>> here. We need to take that for a couple of different operations,
>>>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>> Christian.
>>>>>>>>>>
>>>>>>>>>>> Best regards
>>>>>>>>>>> Thomas
>>>>>>>>>> _______________________________________________
>>>>>>>>>> dri-devel mailing list
>>>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C73458d36471547ca128008d891fb0958%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637419854682670543%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Sa5ao1X5JGFgcnhNiDbCjI4SlMMWzHITBylAZsG%2BVzs%3D&amp;reserved=0
>>>>>>>>> --
>>>>>>>>> Thomas Zimmermann
>>>>>>>>> Graphics Driver Developer
>>>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>> --
>>>> Thomas Zimmermann
>>>> Graphics Driver Developer
>>>> SUSE Software Solutions Germany GmbH
>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>> (HRB 36809, AG Nürnberg)
>>>> Geschäftsführer: Felix Imendörffer
>>>
>>>
>>>
>>>
>>>
>> --
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
>
>
Thomas Zimmermann Nov. 26, 2020, 11:42 a.m. UTC | #25
Hi

Am 26.11.20 um 12:28 schrieb Christian König:
> Am 26.11.20 um 12:04 schrieb Daniel Vetter:
>> On Thu, Nov 26, 2020 at 11:15 AM Thomas Zimmermann 
>> <tzimmermann@suse.de> wrote:
>>> Hi
>>>
>>> Am 25.11.20 um 17:32 schrieb Daniel Vetter:
>>>> [...]
>>>> I guess full locking is required :-/ I'm not exactly sure how to 
>>>> make this
>>>> happen with the current plethora of helpers ... I think we need an
>>>> _locked version of vmap/vunmap callbacks in drm_gem_object_funcs.
>>> I think we might be able to get away without new callbacks.
>>>
>>> I looked through the sources that implement and use vmap. All the
>>> implementations are without taking resv locks. For locking, we can wrap
>>> them in lock/unlock pairs. Having something like drm_gem_vmap_unlocked()
>>> that locks and vmaps should make this easy.
>>>
>>> In terms of implementation, only vram helpers do a pin+map in their vmap
>>> code. And as I mentioned before, this is actually wrong. The pattern
>>> dates back to when the code was still in individual drivers. It's time
>>> to clean this up. Vram helpers can use drm_gem_ttm_vmap() instead.
>>>
>>> Finally, there aren't that many users of vmap. A few drivers use it
>>> while blitting framebuffers into HW buffers and ast does some permanent
>>> mapping of the cursor BO. All this is trivial to turn into small pairs
>>> of lock+vmap and vunmap+unlock.
>>>
>>> That leaves generic fbdev. The shadow buffer is also trivial to fix, as
>>> outlined during this discussion.
>>>
>>> The code for fbdev in hardware buffers is a special case. It vmaps the
>>> buffer during initialization and only vunmaps it during shutdown. As
>>> this has worked so far without locking, I'd leave it as it is and put a
>>> big comment next to is.
> 
> Please keep in mind that you only need to grab the lock if the buffer is 
> not pinned otherwise.
> 
> In other words when we are scanning out from the BO it is guaranteed 
> that it can't move around.
> 
> Maybe this makes the case here easier to handle.

The fbdev code is already fragile. If no shadow FB is selected, the 
hardware BO is vmapped, but never pinned; if only for the reason that 
there's no useful generic interface to do this. So we cannot lock for 
longer periods, but it's also not pinned either. This really only work 
with a few drivers that use CMA helpers, where BOs don't move.

Best regards
Thomas

> 
>>>
>>> Hardware fbdev buffers is only required by few drivers; namely those
>>> that require the CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM config option to work.
>>> We should consider to make the fbdev shadow buffer the default and have
>>> drivers opt-in for the hardware buffer, if they need it.
>>>
>>>> And then document that if the callers of the _locked version wants a
>>>> permanent mapping, it also needs to pin it. Plus I guess ideally 
>>>> implement
>>>> the unlocked/permanent versions in terms of that, so that drivers only
>>>> have to implement one or the other.
>>> For my understanding, pinning is only done in prepare_fb code. And ast
>>> pins its cursor BOs into vram. We should document to hols vmap/vunmap
>>> only for time and cover them with resv locks. Pinning is for cases where
>>> the hardware requires buffers in a special location, but does not
>>> protect against concurrent threat. I think those are the implicit rules
>>> already.
>>>
>>> I updated the radeon patchset, where all this appears to be working 
>>> well.
>> Hm yeah if you want to do the full change, then that works out too.
>> It's just a pile of work.
>>
>> But if we can finish off with an dma_resv_assert_locked in
>> dma_buf_vmap/vunmap, then I think that's ok. It does mean that
>> exporters must implement vmap caching, or performance will be
>> terrible. So quite some update for the dma-buf docs.
> 
> That's one possibility, but I think we should keep the ability to use 
> pin+vmap instead of lock+vmap.
> 
> Regards,
> Christian.
> 
>>
>> But if you're willing to do all that conversion of callers, then of
>> course I'm not stopping you. Not at all, it's great to see that kind
>> of maze untangled.
>> -Daniel
>>
>>> Best regards
>>> Thomas
>>>
>>>> That should give us at least some way forward to gradually conver 
>>>> all the
>>>> drivers and helpers over to dma_resv locking.
>>>> -Daniel
>>>>
>>>>> The pin count is currently maintained by the vmap implementation in 
>>>>> vram
>>>>> helpers. Calling vmap is an implicit pin; calling vunmap is an 
>>>>> implicit
>>>>> unpin. This prevents eviction in the damage worker. But now I was 
>>>>> told than
>>>>> pinning is only for BOs that are controlled by userspace and 
>>>>> internal users
>>>>> should acquire the resv lock. So vram helpers have to be fixed, 
>>>>> actually.
>>>>>
>>>>> In vram helpers, unmapping does not mean eviction. The unmap 
>>>>> operation only
>>>>> marks the BO as unmappable. The real unmap happens when the 
>>>>> eviction takes
>>>>> place. This avoids many modifications to the page tables. IOW an 
>>>>> unpinned,
>>>>> unmapped BO will remain in VRAM until the memory is actually needed.
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>>> So I'm still not seeing how this can go boom.
>>>>>>
>>>>>> Now long term it'd be nice to cut everything over to dma_resv 
>>>>>> locking, but
>>>>>> the issue there is that beyond ttm, none of the helpers (and few 
>>>>>> of the
>>>>>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
>>>>>> interim fix seems like the right solution to me.
>>>>>> -Daniel
>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> Best regards
>>>>>>>> Thomas
>>>>>>>>
>>>>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>>>>> lock could be
>>>>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>>>>> separate pair of
>>>>>>>>>> DRM client functions could do the locking.
>>>>>>>>> Given how this "do the right locking" is a can of worms (and I 
>>>>>>>>> think
>>>>>>>>> it's
>>>>>>>>> worse than what you dug out already) I think the fb_helper.lock 
>>>>>>>>> hack is
>>>>>>>>> perfectly good enough.
>>>>>>>>>
>>>>>>>>> I'm also somewhat worried that starting to use dma_resv lock in 
>>>>>>>>> generic
>>>>>>>>> code, while many helpers/drivers still have their hand-rolled 
>>>>>>>>> locking,
>>>>>>>>> will make conversion over to dma_resv needlessly more complicated.
>>>>>>>>> -Daniel
>>>>>>>>>
>>>>>>>>>> Best regards
>>>>>>>>>> Thomas
>>>>>>>>>>
>>>>>>>>>> [1] 
>>>>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-tip%2Ftree%2Fdrivers%2Fgpu%2Fdrm%2Fdrm_fb_helper.c%3Fid%3Dac60f3f3090115d21f028bffa2dcfb67f695c4f2%23n394&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C73458d36471547ca128008d891fb0958%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637419854682660550%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Cky%2BozENU1nsd4hlfAdsvA6wC0RXsex7gpFuvHlCROM%3D&amp;reserved=0 
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> Please note that the reservation lock you need to take here 
>>>>>>>>>>> is part of
>>>>>>>>>>> the GEM object.
>>>>>>>>>>>
>>>>>>>>>>> Usually we design things in the way that the code needs to 
>>>>>>>>>>> take a lock
>>>>>>>>>>> which protects an object, then do some operations with the 
>>>>>>>>>>> object and
>>>>>>>>>>> then release the lock again.
>>>>>>>>>>>
>>>>>>>>>>> Having in the lock inside the operation can be done as well, but
>>>>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>>>>
>>>>>>>>>>>> Sorry for the noob questions. I'm still trying to understand 
>>>>>>>>>>>> the
>>>>>>>>>>>> implications of acquiring these locks.
>>>>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>>>>> talking about
>>>>>>>>>>> here. We need to take that for a couple of different operations,
>>>>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Christian.
>>>>>>>>>>>
>>>>>>>>>>>> Best regards
>>>>>>>>>>>> Thomas
>>>>>>>>>>> _______________________________________________
>>>>>>>>>>> dri-devel mailing list
>>>>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C73458d36471547ca128008d891fb0958%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637419854682670543%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Sa5ao1X5JGFgcnhNiDbCjI4SlMMWzHITBylAZsG%2BVzs%3D&amp;reserved=0 
>>>>>>>>>>>
>>>>>>>>>> -- 
>>>>>>>>>> Thomas Zimmermann
>>>>>>>>>> Graphics Driver Developer
>>>>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>> -- 
>>>>> Thomas Zimmermann
>>>>> Graphics Driver Developer
>>>>> SUSE Software Solutions Germany GmbH
>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>> (HRB 36809, AG Nürnberg)
>>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>>
>>>>
>>>>
>>>>
>>> -- 
>>> Thomas Zimmermann
>>> Graphics Driver Developer
>>> SUSE Software Solutions Germany GmbH
>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>> (HRB 36809, AG Nürnberg)
>>> Geschäftsführer: Felix Imendörffer
>>
>>
>
Thomas Zimmermann Nov. 26, 2020, 11:59 a.m. UTC | #26
Hi

Am 26.11.20 um 12:04 schrieb Daniel Vetter:
> On Thu, Nov 26, 2020 at 11:15 AM Thomas Zimmermann <tzimmermann@suse.de> wrote:
>>
>> Hi
>>
>> Am 25.11.20 um 17:32 schrieb Daniel Vetter:
>>> [...]
>>> I guess full locking is required :-/ I'm not exactly sure how to make this
>>> happen with the current plethora of helpers ... I think we need an
>>> _locked version of vmap/vunmap callbacks in drm_gem_object_funcs.
>>
>> I think we might be able to get away without new callbacks.
>>
>> I looked through the sources that implement and use vmap. All the
>> implementations are without taking resv locks. For locking, we can wrap
>> them in lock/unlock pairs. Having something like drm_gem_vmap_unlocked()
>> that locks and vmaps should make this easy.
>>
>> In terms of implementation, only vram helpers do a pin+map in their vmap
>> code. And as I mentioned before, this is actually wrong. The pattern
>> dates back to when the code was still in individual drivers. It's time
>> to clean this up. Vram helpers can use drm_gem_ttm_vmap() instead.
>>
>> Finally, there aren't that many users of vmap. A few drivers use it
>> while blitting framebuffers into HW buffers and ast does some permanent
>> mapping of the cursor BO. All this is trivial to turn into small pairs
>> of lock+vmap and vunmap+unlock.
>>
>> That leaves generic fbdev. The shadow buffer is also trivial to fix, as
>> outlined during this discussion.
>>
>> The code for fbdev in hardware buffers is a special case. It vmaps the
>> buffer during initialization and only vunmaps it during shutdown. As
>> this has worked so far without locking, I'd leave it as it is and put a
>> big comment next to is.
>>
>> Hardware fbdev buffers is only required by few drivers; namely those
>> that require the CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM config option to work.
>> We should consider to make the fbdev shadow buffer the default and have
>> drivers opt-in for the hardware buffer, if they need it.
>>
>>>
>>> And then document that if the callers of the _locked version wants a
>>> permanent mapping, it also needs to pin it. Plus I guess ideally implement
>>> the unlocked/permanent versions in terms of that, so that drivers only
>>> have to implement one or the other.
>>
>> For my understanding, pinning is only done in prepare_fb code. And ast
>> pins its cursor BOs into vram. We should document to hols vmap/vunmap
>> only for time and cover them with resv locks. Pinning is for cases where
>> the hardware requires buffers in a special location, but does not
>> protect against concurrent threat. I think those are the implicit rules
>> already.
>>
>> I updated the radeon patchset, where all this appears to be working well.
> 
> Hm yeah if you want to do the full change, then that works out too.
> It's just a pile of work.
> 
> But if we can finish off with an dma_resv_assert_locked in
> dma_buf_vmap/vunmap, then I think that's ok. It does mean that
> exporters must implement vmap caching, or performance will be
> terrible. So quite some update for the dma-buf docs.

Yeah, I remember a bug report about frequent page-table modifications 
wrt to vram helpers. So we implemented the lazy unmapping / vmap 
caching, as suggested by Christian back them. My guess is that anything 
TTM-based can use a similar pattern. Christian probably knows the corner 
cases.

CMA seems obviously working correctly already.

For SHMEM, I'd have to figure out the reference counting of the pages 
involved. My guess is that the vunmap in drm_gem_shmem_vunmap() could be 
moved into the free callback, plus a few other modifications.

Best regards
Thomas

> 
> But if you're willing to do all that conversion of callers, then of
> course I'm not stopping you. Not at all, it's great to see that kind
> of maze untangled.
> -Daniel
> 
>>
>> Best regards
>> Thomas
>>
>>>
>>> That should give us at least some way forward to gradually conver all the
>>> drivers and helpers over to dma_resv locking.
>>> -Daniel
>>>
>>>> The pin count is currently maintained by the vmap implementation in vram
>>>> helpers. Calling vmap is an implicit pin; calling vunmap is an implicit
>>>> unpin. This prevents eviction in the damage worker. But now I was told than
>>>> pinning is only for BOs that are controlled by userspace and internal users
>>>> should acquire the resv lock. So vram helpers have to be fixed, actually.
>>>>
>>>> In vram helpers, unmapping does not mean eviction. The unmap operation only
>>>> marks the BO as unmappable. The real unmap happens when the eviction takes
>>>> place. This avoids many modifications to the page tables. IOW an unpinned,
>>>> unmapped BO will remain in VRAM until the memory is actually needed.
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>>>
>>>>> So I'm still not seeing how this can go boom.
>>>>>
>>>>> Now long term it'd be nice to cut everything over to dma_resv locking, but
>>>>> the issue there is that beyond ttm, none of the helpers (and few of the
>>>>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
>>>>> interim fix seems like the right solution to me.
>>>>> -Daniel
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>
>>>>>>> Best regards
>>>>>>> Thomas
>>>>>>>
>>>>>>>>
>>>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>>>> lock could be
>>>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>>>> separate pair of
>>>>>>>>> DRM client functions could do the locking.
>>>>>>>>
>>>>>>>> Given how this "do the right locking" is a can of worms (and I think
>>>>>>>> it's
>>>>>>>> worse than what you dug out already) I think the fb_helper.lock hack is
>>>>>>>> perfectly good enough.
>>>>>>>>
>>>>>>>> I'm also somewhat worried that starting to use dma_resv lock in generic
>>>>>>>> code, while many helpers/drivers still have their hand-rolled locking,
>>>>>>>> will make conversion over to dma_resv needlessly more complicated.
>>>>>>>> -Daniel
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Best regards
>>>>>>>>> Thomas
>>>>>>>>>
>>>>>>>>> [1] https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Please note that the reservation lock you need to take here is part of
>>>>>>>>>> the GEM object.
>>>>>>>>>>
>>>>>>>>>> Usually we design things in the way that the code needs to take a lock
>>>>>>>>>> which protects an object, then do some operations with the object and
>>>>>>>>>> then release the lock again.
>>>>>>>>>>
>>>>>>>>>> Having in the lock inside the operation can be done as well, but
>>>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>>>
>>>>>>>>>>> Sorry for the noob questions. I'm still trying to understand the
>>>>>>>>>>> implications of acquiring these locks.
>>>>>>>>>>
>>>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>>>> talking about
>>>>>>>>>> here. We need to take that for a couple of different operations,
>>>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>> Christian.
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Best regards
>>>>>>>>>>> Thomas
>>>>>>>>>>
>>>>>>>>>> _______________________________________________
>>>>>>>>>> dri-devel mailing list
>>>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>>>>>
>>>>>>>>> --
>>>>>>>>> Thomas Zimmermann
>>>>>>>>> Graphics Driver Developer
>>>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>> --
>>>> Thomas Zimmermann
>>>> Graphics Driver Developer
>>>> SUSE Software Solutions Germany GmbH
>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>> (HRB 36809, AG Nürnberg)
>>>> Geschäftsführer: Felix Imendörffer
>>>
>>>
>>>
>>>
>>>
>>>
>>
>> --
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
> 
> 
>
Christian König Nov. 26, 2020, 12:08 p.m. UTC | #27
Am 26.11.20 um 12:59 schrieb Thomas Zimmermann:
> Hi
>
> Am 26.11.20 um 12:04 schrieb Daniel Vetter:
>> On Thu, Nov 26, 2020 at 11:15 AM Thomas Zimmermann 
>> <tzimmermann@suse.de> wrote:
>>>
>>> Hi
>>>
>>> Am 25.11.20 um 17:32 schrieb Daniel Vetter:
>>>> [...]
>>>> I guess full locking is required :-/ I'm not exactly sure how to 
>>>> make this
>>>> happen with the current plethora of helpers ... I think we need an
>>>> _locked version of vmap/vunmap callbacks in drm_gem_object_funcs.
>>>
>>> I think we might be able to get away without new callbacks.
>>>
>>> I looked through the sources that implement and use vmap. All the
>>> implementations are without taking resv locks. For locking, we can wrap
>>> them in lock/unlock pairs. Having something like 
>>> drm_gem_vmap_unlocked()
>>> that locks and vmaps should make this easy.
>>>
>>> In terms of implementation, only vram helpers do a pin+map in their 
>>> vmap
>>> code. And as I mentioned before, this is actually wrong. The pattern
>>> dates back to when the code was still in individual drivers. It's time
>>> to clean this up. Vram helpers can use drm_gem_ttm_vmap() instead.
>>>
>>> Finally, there aren't that many users of vmap. A few drivers use it
>>> while blitting framebuffers into HW buffers and ast does some permanent
>>> mapping of the cursor BO. All this is trivial to turn into small pairs
>>> of lock+vmap and vunmap+unlock.
>>>
>>> That leaves generic fbdev. The shadow buffer is also trivial to fix, as
>>> outlined during this discussion.
>>>
>>> The code for fbdev in hardware buffers is a special case. It vmaps the
>>> buffer during initialization and only vunmaps it during shutdown. As
>>> this has worked so far without locking, I'd leave it as it is and put a
>>> big comment next to is.
>>>
>>> Hardware fbdev buffers is only required by few drivers; namely those
>>> that require the CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM config option to work.
>>> We should consider to make the fbdev shadow buffer the default and have
>>> drivers opt-in for the hardware buffer, if they need it.
>>>
>>>>
>>>> And then document that if the callers of the _locked version wants a
>>>> permanent mapping, it also needs to pin it. Plus I guess ideally 
>>>> implement
>>>> the unlocked/permanent versions in terms of that, so that drivers only
>>>> have to implement one or the other.
>>>
>>> For my understanding, pinning is only done in prepare_fb code. And ast
>>> pins its cursor BOs into vram. We should document to hols vmap/vunmap
>>> only for time and cover them with resv locks. Pinning is for cases 
>>> where
>>> the hardware requires buffers in a special location, but does not
>>> protect against concurrent threat. I think those are the implicit rules
>>> already.
>>>
>>> I updated the radeon patchset, where all this appears to be working 
>>> well.
>>
>> Hm yeah if you want to do the full change, then that works out too.
>> It's just a pile of work.
>>
>> But if we can finish off with an dma_resv_assert_locked in
>> dma_buf_vmap/vunmap, then I think that's ok. It does mean that
>> exporters must implement vmap caching, or performance will be
>> terrible. So quite some update for the dma-buf docs.
>
> Yeah, I remember a bug report about frequent page-table modifications 
> wrt to vram helpers. So we implemented the lazy unmapping / vmap 
> caching, as suggested by Christian back them. My guess is that 
> anything TTM-based can use a similar pattern. Christian probably knows 
> the corner cases.

My memory is failing me, what corner case are you talking about?

Christian.

>
> CMA seems obviously working correctly already.
>
> For SHMEM, I'd have to figure out the reference counting of the pages 
> involved. My guess is that the vunmap in drm_gem_shmem_vunmap() could 
> be moved into the free callback, plus a few other modifications.
>
> Best regards
> Thomas
>
>>
>> But if you're willing to do all that conversion of callers, then of
>> course I'm not stopping you. Not at all, it's great to see that kind
>> of maze untangled.
>> -Daniel
>>
>>>
>>> Best regards
>>> Thomas
>>>
>>>>
>>>> That should give us at least some way forward to gradually conver 
>>>> all the
>>>> drivers and helpers over to dma_resv locking.
>>>> -Daniel
>>>>
>>>>> The pin count is currently maintained by the vmap implementation 
>>>>> in vram
>>>>> helpers. Calling vmap is an implicit pin; calling vunmap is an 
>>>>> implicit
>>>>> unpin. This prevents eviction in the damage worker. But now I was 
>>>>> told than
>>>>> pinning is only for BOs that are controlled by userspace and 
>>>>> internal users
>>>>> should acquire the resv lock. So vram helpers have to be fixed, 
>>>>> actually.
>>>>>
>>>>> In vram helpers, unmapping does not mean eviction. The unmap 
>>>>> operation only
>>>>> marks the BO as unmappable. The real unmap happens when the 
>>>>> eviction takes
>>>>> place. This avoids many modifications to the page tables. IOW an 
>>>>> unpinned,
>>>>> unmapped BO will remain in VRAM until the memory is actually needed.
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>>>
>>>>>> So I'm still not seeing how this can go boom.
>>>>>>
>>>>>> Now long term it'd be nice to cut everything over to dma_resv 
>>>>>> locking, but
>>>>>> the issue there is that beyond ttm, none of the helpers (and few 
>>>>>> of the
>>>>>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
>>>>>> interim fix seems like the right solution to me.
>>>>>> -Daniel
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>>
>>>>>>>> Best regards
>>>>>>>> Thomas
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>>>>> lock could be
>>>>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>>>>> separate pair of
>>>>>>>>>> DRM client functions could do the locking.
>>>>>>>>>
>>>>>>>>> Given how this "do the right locking" is a can of worms (and I 
>>>>>>>>> think
>>>>>>>>> it's
>>>>>>>>> worse than what you dug out already) I think the 
>>>>>>>>> fb_helper.lock hack is
>>>>>>>>> perfectly good enough.
>>>>>>>>>
>>>>>>>>> I'm also somewhat worried that starting to use dma_resv lock 
>>>>>>>>> in generic
>>>>>>>>> code, while many helpers/drivers still have their hand-rolled 
>>>>>>>>> locking,
>>>>>>>>> will make conversion over to dma_resv needlessly more 
>>>>>>>>> complicated.
>>>>>>>>> -Daniel
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Best regards
>>>>>>>>>> Thomas
>>>>>>>>>>
>>>>>>>>>> [1] 
>>>>>>>>>> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Please note that the reservation lock you need to take here 
>>>>>>>>>>> is part of
>>>>>>>>>>> the GEM object.
>>>>>>>>>>>
>>>>>>>>>>> Usually we design things in the way that the code needs to 
>>>>>>>>>>> take a lock
>>>>>>>>>>> which protects an object, then do some operations with the 
>>>>>>>>>>> object and
>>>>>>>>>>> then release the lock again.
>>>>>>>>>>>
>>>>>>>>>>> Having in the lock inside the operation can be done as well, 
>>>>>>>>>>> but
>>>>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>>>>
>>>>>>>>>>>> Sorry for the noob questions. I'm still trying to 
>>>>>>>>>>>> understand the
>>>>>>>>>>>> implications of acquiring these locks.
>>>>>>>>>>>
>>>>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>>>>> talking about
>>>>>>>>>>> here. We need to take that for a couple of different 
>>>>>>>>>>> operations,
>>>>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Christian.
>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Best regards
>>>>>>>>>>>> Thomas
>>>>>>>>>>>
>>>>>>>>>>> _______________________________________________
>>>>>>>>>>> dri-devel mailing list
>>>>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>>>>>>
>>>>>>>>>> -- 
>>>>>>>>>> Thomas Zimmermann
>>>>>>>>>> Graphics Driver Developer
>>>>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>> -- 
>>>>> Thomas Zimmermann
>>>>> Graphics Driver Developer
>>>>> SUSE Software Solutions Germany GmbH
>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>> (HRB 36809, AG Nürnberg)
>>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>
>>> -- 
>>> Thomas Zimmermann
>>> Graphics Driver Developer
>>> SUSE Software Solutions Germany GmbH
>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>> (HRB 36809, AG Nürnberg)
>>> Geschäftsführer: Felix Imendörffer
>>
>>
>>
>
Thomas Zimmermann Nov. 26, 2020, 12:14 p.m. UTC | #28
Hi

Am 26.11.20 um 13:08 schrieb Christian König:
>> [...]
>> Yeah, I remember a bug report about frequent page-table modifications 
>> wrt to vram helpers. So we implemented the lazy unmapping / vmap 
>> caching, as suggested by Christian back them. My guess is that 
>> anything TTM-based can use a similar pattern. Christian probably knows 
>> the corner cases.
> 
> My memory is failing me, what corner case are you talking about?

Haha! :D What I meant was that if there were corner cases, you'd know 
about them. From your comment, I guess there are none.

Best regards
Thomas

> 
> Christian.
> 
>>
>> CMA seems obviously working correctly already.
>>
>> For SHMEM, I'd have to figure out the reference counting of the pages 
>> involved. My guess is that the vunmap in drm_gem_shmem_vunmap() could 
>> be moved into the free callback, plus a few other modifications.
>>
>> Best regards
>> Thomas
>>
>>>
>>> But if you're willing to do all that conversion of callers, then of
>>> course I'm not stopping you. Not at all, it's great to see that kind
>>> of maze untangled.
>>> -Daniel
>>>
>>>>
>>>> Best regards
>>>> Thomas
>>>>
>>>>>
>>>>> That should give us at least some way forward to gradually conver 
>>>>> all the
>>>>> drivers and helpers over to dma_resv locking.
>>>>> -Daniel
>>>>>
>>>>>> The pin count is currently maintained by the vmap implementation 
>>>>>> in vram
>>>>>> helpers. Calling vmap is an implicit pin; calling vunmap is an 
>>>>>> implicit
>>>>>> unpin. This prevents eviction in the damage worker. But now I was 
>>>>>> told than
>>>>>> pinning is only for BOs that are controlled by userspace and 
>>>>>> internal users
>>>>>> should acquire the resv lock. So vram helpers have to be fixed, 
>>>>>> actually.
>>>>>>
>>>>>> In vram helpers, unmapping does not mean eviction. The unmap 
>>>>>> operation only
>>>>>> marks the BO as unmappable. The real unmap happens when the 
>>>>>> eviction takes
>>>>>> place. This avoids many modifications to the page tables. IOW an 
>>>>>> unpinned,
>>>>>> unmapped BO will remain in VRAM until the memory is actually needed.
>>>>>>
>>>>>> Best regards
>>>>>> Thomas
>>>>>>
>>>>>>>
>>>>>>> So I'm still not seeing how this can go boom.
>>>>>>>
>>>>>>> Now long term it'd be nice to cut everything over to dma_resv 
>>>>>>> locking, but
>>>>>>> the issue there is that beyond ttm, none of the helpers (and few 
>>>>>>> of the
>>>>>>> drivers) use dma_resv. So this is a fairly big uphill battle. Quick
>>>>>>> interim fix seems like the right solution to me.
>>>>>>> -Daniel
>>>>>>>
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Best regards
>>>>>>>>> Thomas
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>>>>>> lock could be
>>>>>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>>>>>> separate pair of
>>>>>>>>>>> DRM client functions could do the locking.
>>>>>>>>>>
>>>>>>>>>> Given how this "do the right locking" is a can of worms (and I 
>>>>>>>>>> think
>>>>>>>>>> it's
>>>>>>>>>> worse than what you dug out already) I think the 
>>>>>>>>>> fb_helper.lock hack is
>>>>>>>>>> perfectly good enough.
>>>>>>>>>>
>>>>>>>>>> I'm also somewhat worried that starting to use dma_resv lock 
>>>>>>>>>> in generic
>>>>>>>>>> code, while many helpers/drivers still have their hand-rolled 
>>>>>>>>>> locking,
>>>>>>>>>> will make conversion over to dma_resv needlessly more 
>>>>>>>>>> complicated.
>>>>>>>>>> -Daniel
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Best regards
>>>>>>>>>>> Thomas
>>>>>>>>>>>
>>>>>>>>>>> [1] 
>>>>>>>>>>> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394 
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Please note that the reservation lock you need to take here 
>>>>>>>>>>>> is part of
>>>>>>>>>>>> the GEM object.
>>>>>>>>>>>>
>>>>>>>>>>>> Usually we design things in the way that the code needs to 
>>>>>>>>>>>> take a lock
>>>>>>>>>>>> which protects an object, then do some operations with the 
>>>>>>>>>>>> object and
>>>>>>>>>>>> then release the lock again.
>>>>>>>>>>>>
>>>>>>>>>>>> Having in the lock inside the operation can be done as well, 
>>>>>>>>>>>> but
>>>>>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>>>>>
>>>>>>>>>>>>> Sorry for the noob questions. I'm still trying to 
>>>>>>>>>>>>> understand the
>>>>>>>>>>>>> implications of acquiring these locks.
>>>>>>>>>>>>
>>>>>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>>>>>> talking about
>>>>>>>>>>>> here. We need to take that for a couple of different 
>>>>>>>>>>>> operations,
>>>>>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>>>>>
>>>>>>>>>>>> Regards,
>>>>>>>>>>>> Christian.
>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Best regards
>>>>>>>>>>>>> Thomas
>>>>>>>>>>>>
>>>>>>>>>>>> _______________________________________________
>>>>>>>>>>>> dri-devel mailing list
>>>>>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>>>>>>>
>>>>>>>>>>> -- 
>>>>>>>>>>> Thomas Zimmermann
>>>>>>>>>>> Graphics Driver Developer
>>>>>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>> -- 
>>>>>> Thomas Zimmermann
>>>>>> Graphics Driver Developer
>>>>>> SUSE Software Solutions Germany GmbH
>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>> (HRB 36809, AG Nürnberg)
>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>
>>>> -- 
>>>> Thomas Zimmermann
>>>> Graphics Driver Developer
>>>> SUSE Software Solutions Germany GmbH
>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>> (HRB 36809, AG Nürnberg)
>>>> Geschäftsführer: Felix Imendörffer
>>>
>>>
>>>
>>
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Nov. 26, 2020, 12:16 p.m. UTC | #29
Am 26.11.20 um 13:14 schrieb Thomas Zimmermann:
> Hi
>
> Am 26.11.20 um 13:08 schrieb Christian König:
>>> [...]
>>> Yeah, I remember a bug report about frequent page-table 
>>> modifications wrt to vram helpers. So we implemented the lazy 
>>> unmapping / vmap caching, as suggested by Christian back them. My 
>>> guess is that anything TTM-based can use a similar pattern. 
>>> Christian probably knows the corner cases.
>>
>> My memory is failing me, what corner case are you talking about?
>
> Haha! :D What I meant was that if there were corner cases, you'd know 
> about them. From your comment, I guess there are none.

Ah, ok :) I was wondering for a moment if I have missed something.

Cheers,
Christian.

>
> Best regards
> Thomas
>
>>
>> Christian.
>>
>>>
>>> CMA seems obviously working correctly already.
>>>
>>> For SHMEM, I'd have to figure out the reference counting of the 
>>> pages involved. My guess is that the vunmap in 
>>> drm_gem_shmem_vunmap() could be moved into the free callback, plus a 
>>> few other modifications.
>>>
>>> Best regards
>>> Thomas
>>>
>>>>
>>>> But if you're willing to do all that conversion of callers, then of
>>>> course I'm not stopping you. Not at all, it's great to see that kind
>>>> of maze untangled.
>>>> -Daniel
>>>>
>>>>>
>>>>> Best regards
>>>>> Thomas
>>>>>
>>>>>>
>>>>>> That should give us at least some way forward to gradually conver 
>>>>>> all the
>>>>>> drivers and helpers over to dma_resv locking.
>>>>>> -Daniel
>>>>>>
>>>>>>> The pin count is currently maintained by the vmap implementation 
>>>>>>> in vram
>>>>>>> helpers. Calling vmap is an implicit pin; calling vunmap is an 
>>>>>>> implicit
>>>>>>> unpin. This prevents eviction in the damage worker. But now I 
>>>>>>> was told than
>>>>>>> pinning is only for BOs that are controlled by userspace and 
>>>>>>> internal users
>>>>>>> should acquire the resv lock. So vram helpers have to be fixed, 
>>>>>>> actually.
>>>>>>>
>>>>>>> In vram helpers, unmapping does not mean eviction. The unmap 
>>>>>>> operation only
>>>>>>> marks the BO as unmappable. The real unmap happens when the 
>>>>>>> eviction takes
>>>>>>> place. This avoids many modifications to the page tables. IOW an 
>>>>>>> unpinned,
>>>>>>> unmapped BO will remain in VRAM until the memory is actually 
>>>>>>> needed.
>>>>>>>
>>>>>>> Best regards
>>>>>>> Thomas
>>>>>>>
>>>>>>>>
>>>>>>>> So I'm still not seeing how this can go boom.
>>>>>>>>
>>>>>>>> Now long term it'd be nice to cut everything over to dma_resv 
>>>>>>>> locking, but
>>>>>>>> the issue there is that beyond ttm, none of the helpers (and 
>>>>>>>> few of the
>>>>>>>> drivers) use dma_resv. So this is a fairly big uphill battle. 
>>>>>>>> Quick
>>>>>>>> interim fix seems like the right solution to me.
>>>>>>>> -Daniel
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Christian.
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Best regards
>>>>>>>>>> Thomas
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> There's no recursion taking place, so I guess the reservation
>>>>>>>>>>>> lock could be
>>>>>>>>>>>> acquired/release in drm_client_buffer_vmap/vunmap(), or a
>>>>>>>>>>>> separate pair of
>>>>>>>>>>>> DRM client functions could do the locking.
>>>>>>>>>>>
>>>>>>>>>>> Given how this "do the right locking" is a can of worms (and 
>>>>>>>>>>> I think
>>>>>>>>>>> it's
>>>>>>>>>>> worse than what you dug out already) I think the 
>>>>>>>>>>> fb_helper.lock hack is
>>>>>>>>>>> perfectly good enough.
>>>>>>>>>>>
>>>>>>>>>>> I'm also somewhat worried that starting to use dma_resv lock 
>>>>>>>>>>> in generic
>>>>>>>>>>> code, while many helpers/drivers still have their 
>>>>>>>>>>> hand-rolled locking,
>>>>>>>>>>> will make conversion over to dma_resv needlessly more 
>>>>>>>>>>> complicated.
>>>>>>>>>>> -Daniel
>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Best regards
>>>>>>>>>>>> Thomas
>>>>>>>>>>>>
>>>>>>>>>>>> [1] 
>>>>>>>>>>>> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/drm_fb_helper.c?id=ac60f3f3090115d21f028bffa2dcfb67f695c4f2#n394 
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Please note that the reservation lock you need to take 
>>>>>>>>>>>>> here is part of
>>>>>>>>>>>>> the GEM object.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Usually we design things in the way that the code needs to 
>>>>>>>>>>>>> take a lock
>>>>>>>>>>>>> which protects an object, then do some operations with the 
>>>>>>>>>>>>> object and
>>>>>>>>>>>>> then release the lock again.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Having in the lock inside the operation can be done as 
>>>>>>>>>>>>> well, but
>>>>>>>>>>>>> returning with it is kind of unusual design.
>>>>>>>>>>>>>
>>>>>>>>>>>>>> Sorry for the noob questions. I'm still trying to 
>>>>>>>>>>>>>> understand the
>>>>>>>>>>>>>> implications of acquiring these locks.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Well this is the reservation lock of the GEM object we are
>>>>>>>>>>>>> talking about
>>>>>>>>>>>>> here. We need to take that for a couple of different 
>>>>>>>>>>>>> operations,
>>>>>>>>>>>>> vmap/vunmap doesn't sound like a special case to me.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>> Christian.
>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Best regards
>>>>>>>>>>>>>> Thomas
>>>>>>>>>>>>>
>>>>>>>>>>>>> _______________________________________________
>>>>>>>>>>>>> dri-devel mailing list
>>>>>>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>>>>>>>>
>>>>>>>>>>>> -- 
>>>>>>>>>>>> Thomas Zimmermann
>>>>>>>>>>>> Graphics Driver Developer
>>>>>>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>> -- 
>>>>>>> Thomas Zimmermann
>>>>>>> Graphics Driver Developer
>>>>>>> SUSE Software Solutions Germany GmbH
>>>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>>>> (HRB 36809, AG Nürnberg)
>>>>>>> Geschäftsführer: Felix Imendörffer
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>
>>>>> -- 
>>>>> Thomas Zimmermann
>>>>> Graphics Driver Developer
>>>>> SUSE Software Solutions Germany GmbH
>>>>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>>>>> (HRB 36809, AG Nürnberg)
>>>>> Geschäftsführer: Felix Imendörffer
>>>>
>>>>
>>>>
>>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index d2876ce3bc9e..eaf7fc9a7b07 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -226,6 +226,53 @@  static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r)
 	return r;
 }
 
+static int radeon_gem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+{
+	static const uint32_t any_domain = RADEON_GEM_DOMAIN_VRAM |
+					   RADEON_GEM_DOMAIN_GTT |
+					   RADEON_GEM_DOMAIN_CPU;
+
+	struct radeon_bo *bo = gem_to_radeon_bo(obj);
+	int ret;
+
+	ret = radeon_bo_reserve(bo, false);
+	if (ret)
+		return ret;
+
+	/* pin buffer at its current location */
+	ret = radeon_bo_pin(bo, any_domain, NULL);
+	if (ret)
+		goto err_radeon_bo_unreserve;
+
+	ret = drm_gem_ttm_vmap(obj, map);
+	if (ret)
+		goto err_radeon_bo_unpin;
+
+	radeon_bo_unreserve(bo);
+
+	return 0;
+
+err_radeon_bo_unpin:
+	radeon_bo_unpin(bo);
+err_radeon_bo_unreserve:
+	radeon_bo_unreserve(bo);
+	return ret;
+}
+
+static void radeon_gem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+{
+	struct radeon_bo *bo = gem_to_radeon_bo(obj);
+	int ret;
+
+	ret = radeon_bo_reserve(bo, false);
+	if (ret)
+		return;
+
+	drm_gem_ttm_vunmap(obj, map);
+	radeon_bo_unpin(bo);
+	radeon_bo_unreserve(bo);
+}
+
 static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
 	.free = radeon_gem_object_free,
 	.open = radeon_gem_object_open,
@@ -234,8 +281,8 @@  static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
 	.pin = radeon_gem_prime_pin,
 	.unpin = radeon_gem_prime_unpin,
 	.get_sg_table = radeon_gem_prime_get_sg_table,
-	.vmap = drm_gem_ttm_vmap,
-	.vunmap = drm_gem_ttm_vunmap,
+	.vmap = radeon_gem_object_vmap,
+	.vunmap = radeon_gem_object_vunmap,
 };
 
 /*