diff mbox series

[v9,2/3] drm/amdgpu: Enable clear page functionality

Message ID 20240318214058.2014-2-Arunpravin.PaneerSelvam@amd.com (mailing list archive)
State New, archived
Headers show
Series [v9,1/3] drm/buddy: Implement tracking clear page feature | expand

Commit Message

Paneer Selvam, Arunpravin March 18, 2024, 9:40 p.m. UTC
Add clear page support in vram memory region.

v1(Christian):
  - Dont handle clear page as TTM flag since when moving the BO back
    in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
    clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
    amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared(&cursor) just above the size
    calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
    in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
  - Remove buffer clear code in VRAM manager instead change the
    AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
    the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared(&cursor) check.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

Comments

Christian König March 19, 2024, 10:28 a.m. UTC | #1
Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:
> Add clear page support in vram memory region.
>
> v1(Christian):
>    - Dont handle clear page as TTM flag since when moving the BO back
>      in from GTT again we don't need that.
>    - Make a specialized version of amdgpu_fill_buffer() which only
>      clears the VRAM areas which are not already cleared
>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>      amdgpu_object.c
>
> v2:
>    - Modify the function name amdgpu_ttm_* (Alex)
>    - Drop the delayed parameter (Christian)
>    - handle amdgpu_res_cleared(&cursor) just above the size
>      calculation (Christian)
>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>      in the free path to properly wait for fences etc.. (Christian)
>
> v3(Christian):
>    - Remove buffer clear code in VRAM manager instead change the
>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>      the DRM_BUDDY_CLEARED flag.
>    - Remove ! from amdgpu_res_cleared(&cursor) check.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Acked-by: Felix Kuehling <felix.kuehling@amd.com>

Just a few nit picks below, but in general already looks good to me.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>   6 files changed, 111 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 8bc79924d171..c92d92b28a57 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -39,6 +39,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
> +#include "amdgpu_vram_mgr.h"
>   
>   /**
>    * DOC: amdgpu_object
> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   	if (!amdgpu_bo_support_uswc(bo->flags))
>   		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>   
> -	if (adev->ras_enabled)
> -		bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> +	bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>   
>   	bo->tbo.bdev = &adev->mman.bdev;
>   	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   
>   	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>   	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> -		struct dma_fence *fence;
> +		struct dma_fence *fence = NULL;
>   
> -		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> +		r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>   		if (unlikely(r))
>   			goto fail_unreserve;
>   
> -		dma_resv_add_fence(bo->tbo.base.resv, fence,
> -				   DMA_RESV_USAGE_KERNEL);
> -		dma_fence_put(fence);
> +		if (fence) {
> +			dma_resv_add_fence(bo->tbo.base.resv, fence,
> +					   DMA_RESV_USAGE_KERNEL);
> +			dma_fence_put(fence);
> +		}
>   	}
>   	if (!bp->resv)
>   		amdgpu_bo_unreserve(bo);
> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>   	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>   		return;
>   
> -	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
> +	r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>   	if (!WARN_ON(r)) {
> +		struct amdgpu_vram_mgr_resource *vres;
> +
> +		vres = to_amdgpu_vram_mgr_resource(bo->resource);
> +		vres->flags |= DRM_BUDDY_CLEARED;

Those lines should probably be in the VRAM manager.

>   		amdgpu_bo_fence(abo, fence, false);
>   		dma_fence_put(fence);
>   	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 381101d2bf05..50fcd86e1033 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>   	}
>   }
>   
> +/**
> + * amdgpu_res_cleared - check if blocks are cleared
> + *
> + * @cur: the cursor to extract the block
> + *
> + * Check if the @cur block is cleared
> + */
> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> +{
> +	struct drm_buddy_block *block;
> +
> +	switch (cur->mem_type) {
> +	case TTM_PL_VRAM:
> +		block = cur->node;
> +
> +		if (!amdgpu_vram_mgr_is_cleared(block))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 8722beba494e..bcbffe909b47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>   	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>   		struct dma_fence *wipe_fence = NULL;
>   
> -		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
> -					false);
> +		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> +				       false);
>   		if (r) {
>   			goto error;
>   		} else if (wipe_fence) {
> +			struct amdgpu_vram_mgr_resource *vres;
> +
> +			vres = to_amdgpu_vram_mgr_resource(bo->resource);
> +			vres->flags |= DRM_BUDDY_CLEARED;
>   			dma_fence_put(fence);
>   			fence = wipe_fence;
>   		}
> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>   	return 0;
>   }
>   

Some kerneldoc here what the function does would ne nice to have.

> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> +			    struct dma_resv *resv,
> +			    struct dma_fence **fence)
> +{
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +	struct amdgpu_res_cursor cursor;
> +	struct dma_fence *f = NULL;

It might be cleaner to just use the stub fence here (see 
dma_fence_get_stub()).

This would avoid to local variable init in the caller and the if which 
checks if the function returned a fence or not.

> +	u64 addr;
> +	int r;
> +
> +	if (!adev->mman.buffer_funcs_enabled)
> +		return -EINVAL;
> +
> +	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> +
> +	mutex_lock(&adev->mman.gtt_window_lock);
> +	while (cursor.remaining) {
> +		struct dma_fence *next = NULL;
> +		u64 size;
> +
> +		if (amdgpu_res_cleared(&cursor)) {
> +			amdgpu_res_next(&cursor, cursor.size);
> +			continue;
> +		}
> +
> +		/* Never clear more than 256MiB at once to avoid timeouts */
> +		size = min(cursor.size, 256ULL << 20);
> +
> +		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> +					  1, ring, false, &size, &addr);
> +		if (r)
> +			goto err;
> +
> +		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> +					&next, true, true);
> +		if (r)
> +			goto err;
> +
> +		dma_fence_put(f);
> +		f = next;
> +
> +		amdgpu_res_next(&cursor, size);
> +	}
> +err:
> +	mutex_unlock(&adev->mman.gtt_window_lock);
> +	if (fence)

Just make fence a mandatory parameter and drop the if and the get/put dance.

> +		*fence = dma_fence_get(f);
> +	dma_fence_put(f);
> +
> +	return r;
> +}
> +
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint32_t src_data,
>   			struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 65ec82141a8e..b404d89d52e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -38,8 +38,6 @@
>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
>   
> -#define AMDGPU_POISON	0xd0bed0be
> -
>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>   
> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   			       uint64_t size, bool tmz,
>   			       struct dma_resv *resv,
>   			       struct dma_fence **f);
> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> +			    struct dma_resv *resv,
> +			    struct dma_fence **fence);
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint32_t src_data,
>   			struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index c0c851409241..e494f5bf136a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   {
>   	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>   	struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>   	u64 vis_usage = 0, max_bytes, min_block_size;
>   	struct amdgpu_vram_mgr_resource *vres;
>   	u64 size, remaining_size, lpfn, fpfn;
> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>   		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>   
> +	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> +		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> +

Mhm, you should probably *not* store this flags in the vres structure.

As soon as the BO is used the VRAM wouldn't be cleared any more.

Regards,
Christian.

>   	if (fpfn || lpfn != mgr->mm.size)
>   		/* Allocate blocks in desired range */
>   		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   
>   	amdgpu_vram_mgr_do_reserve(man);
>   
> -	drm_buddy_free_list(mm, &vres->blocks, 0);
> +	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>   	mutex_unlock(&mgr->lock);
>   
>   	atomic64_sub(vis_usage, &mgr->vis_usage);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> index 0e04e42cf809..8478522d7366 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>   	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>   }
>   
> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> +{
> +	return drm_buddy_block_is_clear(block);
> +}
> +
>   static inline struct amdgpu_vram_mgr_resource *
>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>   {
Paneer Selvam, Arunpravin March 19, 2024, 11:41 a.m. UTC | #2
Hi Christian,

On 3/19/2024 3:58 PM, Christian König wrote:
>
>
> Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:
>> Add clear page support in vram memory region.
>>
>> v1(Christian):
>>    - Dont handle clear page as TTM flag since when moving the BO back
>>      in from GTT again we don't need that.
>>    - Make a specialized version of amdgpu_fill_buffer() which only
>>      clears the VRAM areas which are not already cleared
>>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>>      amdgpu_object.c
>>
>> v2:
>>    - Modify the function name amdgpu_ttm_* (Alex)
>>    - Drop the delayed parameter (Christian)
>>    - handle amdgpu_res_cleared(&cursor) just above the size
>>      calculation (Christian)
>>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>>      in the free path to properly wait for fences etc.. (Christian)
>>
>> v3(Christian):
>>    - Remove buffer clear code in VRAM manager instead change the
>>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>>      the DRM_BUDDY_CLEARED flag.
>>    - Remove ! from amdgpu_res_cleared(&cursor) check.
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
>
> Just a few nit picks below, but in general already looks good to me.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>>   6 files changed, 111 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 8bc79924d171..c92d92b28a57 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -39,6 +39,7 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_trace.h"
>>   #include "amdgpu_amdkfd.h"
>> +#include "amdgpu_vram_mgr.h"
>>     /**
>>    * DOC: amdgpu_object
>> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>       if (!amdgpu_bo_support_uswc(bo->flags))
>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>>   -    if (adev->ras_enabled)
>> -        bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>> +    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>         bo->tbo.bdev = &adev->mman.bdev;
>>       if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
>> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>         if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>>           bo->tbo.resource->mem_type == TTM_PL_VRAM) {
>> -        struct dma_fence *fence;
>> +        struct dma_fence *fence = NULL;
>>   -        r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, 
>> true);
>> +        r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>>           if (unlikely(r))
>>               goto fail_unreserve;
>>   -        dma_resv_add_fence(bo->tbo.base.resv, fence,
>> -                   DMA_RESV_USAGE_KERNEL);
>> -        dma_fence_put(fence);
>> +        if (fence) {
>> +            dma_resv_add_fence(bo->tbo.base.resv, fence,
>> +                       DMA_RESV_USAGE_KERNEL);
>> +            dma_fence_put(fence);
>> +        }
>>       }
>>       if (!bp->resv)
>>           amdgpu_bo_unreserve(bo);
>> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
>> ttm_buffer_object *bo)
>>       if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>>           return;
>>   -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
>> &fence, true);
>> +    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>>       if (!WARN_ON(r)) {
>> +        struct amdgpu_vram_mgr_resource *vres;
>> +
>> +        vres = to_amdgpu_vram_mgr_resource(bo->resource);
>> +        vres->flags |= DRM_BUDDY_CLEARED;
>
> Those lines should probably be in the VRAM manager.
This flag is set based on the amdgpu_fill_buffer() function return 
value, can we move the amdgpu_fill_buffer() function call and
DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
manager and does it require to wipe the VRAM buffers here as well
without setting the DRM_BUDDY_CLEARED.
>
>>           amdgpu_bo_fence(abo, fence, false);
>>           dma_fence_put(fence);
>>       }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>> index 381101d2bf05..50fcd86e1033 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
>> amdgpu_res_cursor *cur, uint64_t size)
>>       }
>>   }
>>   +/**
>> + * amdgpu_res_cleared - check if blocks are cleared
>> + *
>> + * @cur: the cursor to extract the block
>> + *
>> + * Check if the @cur block is cleared
>> + */
>> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
>> +{
>> +    struct drm_buddy_block *block;
>> +
>> +    switch (cur->mem_type) {
>> +    case TTM_PL_VRAM:
>> +        block = cur->node;
>> +
>> +        if (!amdgpu_vram_mgr_is_cleared(block))
>> +            return false;
>> +        break;
>> +    default:
>> +        return false;
>> +    }
>> +
>> +    return true;
>> +}
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 8722beba494e..bcbffe909b47 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct 
>> ttm_buffer_object *bo,
>>           (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>           struct dma_fence *wipe_fence = NULL;
>>   -        r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
>> -                    false);
>> +        r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>> +                       false);
>>           if (r) {
>>               goto error;
>>           } else if (wipe_fence) {
>> +            struct amdgpu_vram_mgr_resource *vres;
>> +
>> +            vres = to_amdgpu_vram_mgr_resource(bo->resource);
>> +            vres->flags |= DRM_BUDDY_CLEARED;
Does it require to set the DRM_BUDDY_CLEARED flag as we are not freeing 
the VRAM buffers?
>>               dma_fence_put(fence);
>>               fence = wipe_fence;
>>           }
>> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct 
>> amdgpu_ring *ring, uint32_t src_data,
>>       return 0;
>>   }
>
> Some kerneldoc here what the function does would ne nice to have.
>
>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> +                struct dma_resv *resv,
>> +                struct dma_fence **fence)
>> +{
>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> +    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> +    struct amdgpu_res_cursor cursor;
>> +    struct dma_fence *f = NULL;
>
> It might be cleaner to just use the stub fence here (see 
> dma_fence_get_stub()).
>
> This would avoid to local variable init in the caller and the if which 
> checks if the function returned a fence or not.
>
>> +    u64 addr;
>> +    int r;
>> +
>> +    if (!adev->mman.buffer_funcs_enabled)
>> +        return -EINVAL;
>> +
>> +    amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
>> +
>> +    mutex_lock(&adev->mman.gtt_window_lock);
>> +    while (cursor.remaining) {
>> +        struct dma_fence *next = NULL;
>> +        u64 size;
>> +
>> +        if (amdgpu_res_cleared(&cursor)) {
>> +            amdgpu_res_next(&cursor, cursor.size);
>> +            continue;
>> +        }
>> +
>> +        /* Never clear more than 256MiB at once to avoid timeouts */
>> +        size = min(cursor.size, 256ULL << 20);
>> +
>> +        r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>> +                      1, ring, false, &size, &addr);
>> +        if (r)
>> +            goto err;
>> +
>> +        r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>> +                    &next, true, true);
>> +        if (r)
>> +            goto err;
>> +
>> +        dma_fence_put(f);
>> +        f = next;
>> +
>> +        amdgpu_res_next(&cursor, size);
>> +    }
>> +err:
>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>> +    if (fence)
>
> Just make fence a mandatory parameter and drop the if and the get/put 
> dance.
>
>> +        *fence = dma_fence_get(f);
>> +    dma_fence_put(f);
>> +
>> +    return r;
>> +}
>> +
>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>               uint32_t src_data,
>>               struct dma_resv *resv,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 65ec82141a8e..b404d89d52e5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -38,8 +38,6 @@
>>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE    512
>>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS    2
>>   -#define AMDGPU_POISON    0xd0bed0be
>> -
>>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>   @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct 
>> amdgpu_device *adev,
>>                      uint64_t size, bool tmz,
>>                      struct dma_resv *resv,
>>                      struct dma_fence **f);
>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> +                struct dma_resv *resv,
>> +                struct dma_fence **fence);
>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>               uint32_t src_data,
>>               struct dma_resv *resv,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index c0c851409241..e494f5bf136a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>   {
>>       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>       struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> +    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>>       u64 vis_usage = 0, max_bytes, min_block_size;
>>       struct amdgpu_vram_mgr_resource *vres;
>>       u64 size, remaining_size, lpfn, fpfn;
>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>           vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>   +    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>> +        vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
>> +
>
> Mhm, you should probably *not* store this flags in the vres structure.
>
> As soon as the BO is used the VRAM wouldn't be cleared any more.
>
> Regards,
> Christian.
>
>>       if (fpfn || lpfn != mgr->mm.size)
>>           /* Allocate blocks in desired range */
>>           vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>         amdgpu_vram_mgr_do_reserve(man);
>>   -    drm_buddy_free_list(mm, &vres->blocks, 0);
>> +    drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>>       mutex_unlock(&mgr->lock);
>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>> index 0e04e42cf809..8478522d7366 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>> @@ -53,6 +53,11 @@ static inline u64 
>> amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>>       return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>>   }
>>   +static inline bool amdgpu_vram_mgr_is_cleared(struct 
>> drm_buddy_block *block)
>> +{
>> +    return drm_buddy_block_is_clear(block);
>> +}
>> +
>>   static inline struct amdgpu_vram_mgr_resource *
>>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>>   {
>
Christian König March 19, 2024, 1:47 p.m. UTC | #3
Am 19.03.24 um 12:41 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
> On 3/19/2024 3:58 PM, Christian König wrote:
>>
>>
>> Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:
>>> Add clear page support in vram memory region.
>>>
>>> v1(Christian):
>>>    - Dont handle clear page as TTM flag since when moving the BO back
>>>      in from GTT again we don't need that.
>>>    - Make a specialized version of amdgpu_fill_buffer() which only
>>>      clears the VRAM areas which are not already cleared
>>>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>>>      amdgpu_object.c
>>>
>>> v2:
>>>    - Modify the function name amdgpu_ttm_* (Alex)
>>>    - Drop the delayed parameter (Christian)
>>>    - handle amdgpu_res_cleared(&cursor) just above the size
>>>      calculation (Christian)
>>>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the 
>>> buffers
>>>      in the free path to properly wait for fences etc.. (Christian)
>>>
>>> v3(Christian):
>>>    - Remove buffer clear code in VRAM manager instead change the
>>>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>>>      the DRM_BUDDY_CLEARED flag.
>>>    - Remove ! from amdgpu_res_cleared(&cursor) check.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
>>
>> Just a few nit picks below, but in general already looks good to me.
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 
>>> ++++++++++++++++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>>>   6 files changed, 111 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index 8bc79924d171..c92d92b28a57 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -39,6 +39,7 @@
>>>   #include "amdgpu.h"
>>>   #include "amdgpu_trace.h"
>>>   #include "amdgpu_amdkfd.h"
>>> +#include "amdgpu_vram_mgr.h"
>>>     /**
>>>    * DOC: amdgpu_object
>>> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>       if (!amdgpu_bo_support_uswc(bo->flags))
>>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>>>   -    if (adev->ras_enabled)
>>> -        bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>> +    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>         bo->tbo.bdev = &adev->mman.bdev;
>>>       if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
>>> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>         if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>>>           bo->tbo.resource->mem_type == TTM_PL_VRAM) {
>>> -        struct dma_fence *fence;
>>> +        struct dma_fence *fence = NULL;
>>>   -        r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, 
>>> true);
>>> +        r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>>>           if (unlikely(r))
>>>               goto fail_unreserve;
>>>   -        dma_resv_add_fence(bo->tbo.base.resv, fence,
>>> -                   DMA_RESV_USAGE_KERNEL);
>>> -        dma_fence_put(fence);
>>> +        if (fence) {
>>> +            dma_resv_add_fence(bo->tbo.base.resv, fence,
>>> +                       DMA_RESV_USAGE_KERNEL);
>>> +            dma_fence_put(fence);
>>> +        }
>>>       }
>>>       if (!bp->resv)
>>>           amdgpu_bo_unreserve(bo);
>>> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
>>> ttm_buffer_object *bo)
>>>       if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>>>           return;
>>>   -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
>>> &fence, true);
>>> +    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>>>       if (!WARN_ON(r)) {
>>> +        struct amdgpu_vram_mgr_resource *vres;
>>> +
>>> +        vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>> +        vres->flags |= DRM_BUDDY_CLEARED;
>>
>> Those lines should probably be in the VRAM manager.
> This flag is set based on the amdgpu_fill_buffer() function return 
> value, can we move the amdgpu_fill_buffer() function call and
> DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
> manager and does it require to wipe the VRAM buffers here as well
> without setting the DRM_BUDDY_CLEARED.

No that won't work. The call to amdgpu_fill_buffer() *must* be here 
because that here is called before the resource is actually freed.

Only setting the vres->flags should probably be moved into 
amdgpu_vram_mgr.c.

So instead of calling that manually here just make a function for it to 
keep the code related to the buddy allocator in one place.

Regards,
Christian.

>>
>>>           amdgpu_bo_fence(abo, fence, false);
>>>           dma_fence_put(fence);
>>>       }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>> index 381101d2bf05..50fcd86e1033 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
>>> amdgpu_res_cursor *cur, uint64_t size)
>>>       }
>>>   }
>>>   +/**
>>> + * amdgpu_res_cleared - check if blocks are cleared
>>> + *
>>> + * @cur: the cursor to extract the block
>>> + *
>>> + * Check if the @cur block is cleared
>>> + */
>>> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
>>> +{
>>> +    struct drm_buddy_block *block;
>>> +
>>> +    switch (cur->mem_type) {
>>> +    case TTM_PL_VRAM:
>>> +        block = cur->node;
>>> +
>>> +        if (!amdgpu_vram_mgr_is_cleared(block))
>>> +            return false;
>>> +        break;
>>> +    default:
>>> +        return false;
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>>   #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 8722beba494e..bcbffe909b47 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct 
>>> ttm_buffer_object *bo,
>>>           (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>>           struct dma_fence *wipe_fence = NULL;
>>>   -        r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, 
>>> &wipe_fence,
>>> -                    false);
>>> +        r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>>> +                       false);
>>>           if (r) {
>>>               goto error;
>>>           } else if (wipe_fence) {
>>> +            struct amdgpu_vram_mgr_resource *vres;
>>> +
>>> +            vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>> +            vres->flags |= DRM_BUDDY_CLEARED;
> Does it require to set the DRM_BUDDY_CLEARED flag as we are not 
> freeing the VRAM buffers?
>>>               dma_fence_put(fence);
>>>               fence = wipe_fence;
>>>           }
>>> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct 
>>> amdgpu_ring *ring, uint32_t src_data,
>>>       return 0;
>>>   }
>>
>> Some kerneldoc here what the function does would ne nice to have.
>>
>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> +                struct dma_resv *resv,
>>> +                struct dma_fence **fence)
>>> +{
>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> +    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> +    struct amdgpu_res_cursor cursor;
>>> +    struct dma_fence *f = NULL;
>>
>> It might be cleaner to just use the stub fence here (see 
>> dma_fence_get_stub()).
>>
>> This would avoid to local variable init in the caller and the if 
>> which checks if the function returned a fence or not.
>>
>>> +    u64 addr;
>>> +    int r;
>>> +
>>> +    if (!adev->mman.buffer_funcs_enabled)
>>> +        return -EINVAL;
>>> +
>>> +    amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), 
>>> &cursor);
>>> +
>>> +    mutex_lock(&adev->mman.gtt_window_lock);
>>> +    while (cursor.remaining) {
>>> +        struct dma_fence *next = NULL;
>>> +        u64 size;
>>> +
>>> +        if (amdgpu_res_cleared(&cursor)) {
>>> +            amdgpu_res_next(&cursor, cursor.size);
>>> +            continue;
>>> +        }
>>> +
>>> +        /* Never clear more than 256MiB at once to avoid timeouts */
>>> +        size = min(cursor.size, 256ULL << 20);
>>> +
>>> +        r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>>> +                      1, ring, false, &size, &addr);
>>> +        if (r)
>>> +            goto err;
>>> +
>>> +        r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>>> +                    &next, true, true);
>>> +        if (r)
>>> +            goto err;
>>> +
>>> +        dma_fence_put(f);
>>> +        f = next;
>>> +
>>> +        amdgpu_res_next(&cursor, size);
>>> +    }
>>> +err:
>>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>>> +    if (fence)
>>
>> Just make fence a mandatory parameter and drop the if and the get/put 
>> dance.
>>
>>> +        *fence = dma_fence_get(f);
>>> +    dma_fence_put(f);
>>> +
>>> +    return r;
>>> +}
>>> +
>>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>               uint32_t src_data,
>>>               struct dma_resv *resv,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index 65ec82141a8e..b404d89d52e5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -38,8 +38,6 @@
>>>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE    512
>>>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS    2
>>>   -#define AMDGPU_POISON    0xd0bed0be
>>> -
>>>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>>   @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct 
>>> amdgpu_device *adev,
>>>                      uint64_t size, bool tmz,
>>>                      struct dma_resv *resv,
>>>                      struct dma_fence **f);
>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> +                struct dma_resv *resv,
>>> +                struct dma_fence **fence);
>>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>               uint32_t src_data,
>>>               struct dma_resv *resv,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index c0c851409241..e494f5bf136a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct 
>>> ttm_resource_manager *man,
>>>   {
>>>       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>       struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>> +    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>>>       u64 vis_usage = 0, max_bytes, min_block_size;
>>>       struct amdgpu_vram_mgr_resource *vres;
>>>       u64 size, remaining_size, lpfn, fpfn;
>>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct 
>>> ttm_resource_manager *man,
>>>       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>           vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>>   +    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>>> +        vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
>>> +
>>
>> Mhm, you should probably *not* store this flags in the vres structure.
>>
>> As soon as the BO is used the VRAM wouldn't be cleared any more.
>>
>> Regards,
>> Christian.
>>
>>>       if (fpfn || lpfn != mgr->mm.size)
>>>           /* Allocate blocks in desired range */
>>>           vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct 
>>> ttm_resource_manager *man,
>>>         amdgpu_vram_mgr_do_reserve(man);
>>>   -    drm_buddy_free_list(mm, &vres->blocks, 0);
>>> +    drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>>>       mutex_unlock(&mgr->lock);
>>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>> index 0e04e42cf809..8478522d7366 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>> @@ -53,6 +53,11 @@ static inline u64 
>>> amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>>>       return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>>>   }
>>>   +static inline bool amdgpu_vram_mgr_is_cleared(struct 
>>> drm_buddy_block *block)
>>> +{
>>> +    return drm_buddy_block_is_clear(block);
>>> +}
>>> +
>>>   static inline struct amdgpu_vram_mgr_resource *
>>>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>>>   {
>>
>
Paneer Selvam, Arunpravin March 19, 2024, 1:57 p.m. UTC | #4
Hi Christian,

On 3/19/2024 7:17 PM, Christian König wrote:
> Am 19.03.24 um 12:41 schrieb Paneer Selvam, Arunpravin:
>> Hi Christian,
>>
>> On 3/19/2024 3:58 PM, Christian König wrote:
>>>
>>>
>>> Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:
>>>> Add clear page support in vram memory region.
>>>>
>>>> v1(Christian):
>>>>    - Dont handle clear page as TTM flag since when moving the BO back
>>>>      in from GTT again we don't need that.
>>>>    - Make a specialized version of amdgpu_fill_buffer() which only
>>>>      clears the VRAM areas which are not already cleared
>>>>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>>>>      amdgpu_object.c
>>>>
>>>> v2:
>>>>    - Modify the function name amdgpu_ttm_* (Alex)
>>>>    - Drop the delayed parameter (Christian)
>>>>    - handle amdgpu_res_cleared(&cursor) just above the size
>>>>      calculation (Christian)
>>>>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the 
>>>> buffers
>>>>      in the free path to properly wait for fences etc.. (Christian)
>>>>
>>>> v3(Christian):
>>>>    - Remove buffer clear code in VRAM manager instead change the
>>>>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>>>>      the DRM_BUDDY_CLEARED flag.
>>>>    - Remove ! from amdgpu_res_cleared(&cursor) check.
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
>>>
>>> Just a few nit picks below, but in general already looks good to me.
>>>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 
>>>> ++++++++++++++++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>>>>   6 files changed, 111 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index 8bc79924d171..c92d92b28a57 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -39,6 +39,7 @@
>>>>   #include "amdgpu.h"
>>>>   #include "amdgpu_trace.h"
>>>>   #include "amdgpu_amdkfd.h"
>>>> +#include "amdgpu_vram_mgr.h"
>>>>     /**
>>>>    * DOC: amdgpu_object
>>>> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>>       if (!amdgpu_bo_support_uswc(bo->flags))
>>>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>>>>   -    if (adev->ras_enabled)
>>>> -        bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>> +    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>>         bo->tbo.bdev = &adev->mman.bdev;
>>>>       if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
>>>> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>>         if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>>>>           bo->tbo.resource->mem_type == TTM_PL_VRAM) {
>>>> -        struct dma_fence *fence;
>>>> +        struct dma_fence *fence = NULL;
>>>>   -        r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, 
>>>> true);
>>>> +        r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>>>>           if (unlikely(r))
>>>>               goto fail_unreserve;
>>>>   -        dma_resv_add_fence(bo->tbo.base.resv, fence,
>>>> -                   DMA_RESV_USAGE_KERNEL);
>>>> -        dma_fence_put(fence);
>>>> +        if (fence) {
>>>> +            dma_resv_add_fence(bo->tbo.base.resv, fence,
>>>> +                       DMA_RESV_USAGE_KERNEL);
>>>> +            dma_fence_put(fence);
>>>> +        }
>>>>       }
>>>>       if (!bp->resv)
>>>>           amdgpu_bo_unreserve(bo);
>>>> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
>>>> ttm_buffer_object *bo)
>>>>       if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>>>>           return;
>>>>   -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
>>>> &fence, true);
>>>> +    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>>>>       if (!WARN_ON(r)) {
>>>> +        struct amdgpu_vram_mgr_resource *vres;
>>>> +
>>>> +        vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>>> +        vres->flags |= DRM_BUDDY_CLEARED;
>>>
>>> Those lines should probably be in the VRAM manager.
>> This flag is set based on the amdgpu_fill_buffer() function return 
>> value, can we move the amdgpu_fill_buffer() function call and
>> DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
>> manager and does it require to wipe the VRAM buffers here as well
>> without setting the DRM_BUDDY_CLEARED.
>
> No that won't work. The call to amdgpu_fill_buffer() *must* be here 
> because that here is called before the resource is actually freed.
>
> Only setting the vres->flags should probably be moved into 
> amdgpu_vram_mgr.c.
>
> So instead of calling that manually here just make a function for it 
> to keep the code related to the buddy allocator in one place.
I got it, Thanks.

Regards,
Arun.
>
> Regards,
> Christian.
>
>>>
>>>>           amdgpu_bo_fence(abo, fence, false);
>>>>           dma_fence_put(fence);
>>>>       }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>> index 381101d2bf05..50fcd86e1033 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
>>>> amdgpu_res_cursor *cur, uint64_t size)
>>>>       }
>>>>   }
>>>>   +/**
>>>> + * amdgpu_res_cleared - check if blocks are cleared
>>>> + *
>>>> + * @cur: the cursor to extract the block
>>>> + *
>>>> + * Check if the @cur block is cleared
>>>> + */
>>>> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
>>>> +{
>>>> +    struct drm_buddy_block *block;
>>>> +
>>>> +    switch (cur->mem_type) {
>>>> +    case TTM_PL_VRAM:
>>>> +        block = cur->node;
>>>> +
>>>> +        if (!amdgpu_vram_mgr_is_cleared(block))
>>>> +            return false;
>>>> +        break;
>>>> +    default:
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>> +
>>>>   #endif
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 8722beba494e..bcbffe909b47 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct 
>>>> ttm_buffer_object *bo,
>>>>           (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>>>           struct dma_fence *wipe_fence = NULL;
>>>>   -        r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, 
>>>> &wipe_fence,
>>>> -                    false);
>>>> +        r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>>>> +                       false);
>>>>           if (r) {
>>>>               goto error;
>>>>           } else if (wipe_fence) {
>>>> +            struct amdgpu_vram_mgr_resource *vres;
>>>> +
>>>> +            vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>>> +            vres->flags |= DRM_BUDDY_CLEARED;
>> Does it require to set the DRM_BUDDY_CLEARED flag as we are not 
>> freeing the VRAM buffers?
>>>>               dma_fence_put(fence);
>>>>               fence = wipe_fence;
>>>>           }
>>>> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct 
>>>> amdgpu_ring *ring, uint32_t src_data,
>>>>       return 0;
>>>>   }
>>>
>>> Some kerneldoc here what the function does would ne nice to have.
>>>
>>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>>> +                struct dma_resv *resv,
>>>> +                struct dma_fence **fence)
>>>> +{
>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>> +    struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>>> +    struct amdgpu_res_cursor cursor;
>>>> +    struct dma_fence *f = NULL;
>>>
>>> It might be cleaner to just use the stub fence here (see 
>>> dma_fence_get_stub()).
>>>
>>> This would avoid to local variable init in the caller and the if 
>>> which checks if the function returned a fence or not.
>>>
>>>> +    u64 addr;
>>>> +    int r;
>>>> +
>>>> +    if (!adev->mman.buffer_funcs_enabled)
>>>> +        return -EINVAL;
>>>> +
>>>> +    amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), 
>>>> &cursor);
>>>> +
>>>> +    mutex_lock(&adev->mman.gtt_window_lock);
>>>> +    while (cursor.remaining) {
>>>> +        struct dma_fence *next = NULL;
>>>> +        u64 size;
>>>> +
>>>> +        if (amdgpu_res_cleared(&cursor)) {
>>>> +            amdgpu_res_next(&cursor, cursor.size);
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        /* Never clear more than 256MiB at once to avoid timeouts */
>>>> +        size = min(cursor.size, 256ULL << 20);
>>>> +
>>>> +        r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, 
>>>> &cursor,
>>>> +                      1, ring, false, &size, &addr);
>>>> +        if (r)
>>>> +            goto err;
>>>> +
>>>> +        r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>>>> +                    &next, true, true);
>>>> +        if (r)
>>>> +            goto err;
>>>> +
>>>> +        dma_fence_put(f);
>>>> +        f = next;
>>>> +
>>>> +        amdgpu_res_next(&cursor, size);
>>>> +    }
>>>> +err:
>>>> +    mutex_unlock(&adev->mman.gtt_window_lock);
>>>> +    if (fence)
>>>
>>> Just make fence a mandatory parameter and drop the if and the 
>>> get/put dance.
>>>
>>>> +        *fence = dma_fence_get(f);
>>>> +    dma_fence_put(f);
>>>> +
>>>> +    return r;
>>>> +}
>>>> +
>>>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>               uint32_t src_data,
>>>>               struct dma_resv *resv,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> index 65ec82141a8e..b404d89d52e5 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> @@ -38,8 +38,6 @@
>>>>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE    512
>>>>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS    2
>>>>   -#define AMDGPU_POISON    0xd0bed0be
>>>> -
>>>>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>>>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>>>   @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct 
>>>> amdgpu_device *adev,
>>>>                      uint64_t size, bool tmz,
>>>>                      struct dma_resv *resv,
>>>>                      struct dma_fence **f);
>>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>>> +                struct dma_resv *resv,
>>>> +                struct dma_fence **fence);
>>>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>               uint32_t src_data,
>>>>               struct dma_resv *resv,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> index c0c851409241..e494f5bf136a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct 
>>>> ttm_resource_manager *man,
>>>>   {
>>>>       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>       struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>> +    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>>>>       u64 vis_usage = 0, max_bytes, min_block_size;
>>>>       struct amdgpu_vram_mgr_resource *vres;
>>>>       u64 size, remaining_size, lpfn, fpfn;
>>>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct 
>>>> ttm_resource_manager *man,
>>>>       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>           vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>>>   +    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>>>> +        vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
>>>> +
>>>
>>> Mhm, you should probably *not* store this flags in the vres structure.
>>>
>>> As soon as the BO is used the VRAM wouldn't be cleared any more.
>>>
>>> Regards,
>>> Christian.
>>>
>>>>       if (fpfn || lpfn != mgr->mm.size)
>>>>           /* Allocate blocks in desired range */
>>>>           vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct 
>>>> ttm_resource_manager *man,
>>>>         amdgpu_vram_mgr_do_reserve(man);
>>>>   -    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>> +    drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>>>>       mutex_unlock(&mgr->lock);
>>>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>> index 0e04e42cf809..8478522d7366 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>> @@ -53,6 +53,11 @@ static inline u64 
>>>> amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>>>>       return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>>>>   }
>>>>   +static inline bool amdgpu_vram_mgr_is_cleared(struct 
>>>> drm_buddy_block *block)
>>>> +{
>>>> +    return drm_buddy_block_is_clear(block);
>>>> +}
>>>> +
>>>>   static inline struct amdgpu_vram_mgr_resource *
>>>>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>>>>   {
>>>
>>
>
Alex Deucher March 26, 2024, 1:38 p.m. UTC | #5
On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> Add clear page support in vram memory region.
>
> v1(Christian):
>   - Dont handle clear page as TTM flag since when moving the BO back
>     in from GTT again we don't need that.
>   - Make a specialized version of amdgpu_fill_buffer() which only
>     clears the VRAM areas which are not already cleared
>   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>     amdgpu_object.c
>
> v2:
>   - Modify the function name amdgpu_ttm_* (Alex)
>   - Drop the delayed parameter (Christian)
>   - handle amdgpu_res_cleared(&cursor) just above the size
>     calculation (Christian)
>   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>     in the free path to properly wait for fences etc.. (Christian)
>
> v3(Christian):
>   - Remove buffer clear code in VRAM manager instead change the
>     AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>     the DRM_BUDDY_CLEARED flag.
>   - Remove ! from amdgpu_res_cleared(&cursor) check.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>  6 files changed, 111 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 8bc79924d171..c92d92b28a57 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -39,6 +39,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_amdkfd.h"
> +#include "amdgpu_vram_mgr.h"
>
>  /**
>   * DOC: amdgpu_object
> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>         if (!amdgpu_bo_support_uswc(bo->flags))
>                 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>
> -       if (adev->ras_enabled)
> -               bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> +       bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>
>         bo->tbo.bdev = &adev->mman.bdev;
>         if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>
>         if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>             bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> -               struct dma_fence *fence;
> +               struct dma_fence *fence = NULL;
>
> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> +               r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>                 if (unlikely(r))
>                         goto fail_unreserve;
>
> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
> -                                  DMA_RESV_USAGE_KERNEL);
> -               dma_fence_put(fence);
> +               if (fence) {
> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
> +                                          DMA_RESV_USAGE_KERNEL);
> +                       dma_fence_put(fence);
> +               }
>         }
>         if (!bp->resv)
>                 amdgpu_bo_unreserve(bo);
> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>         if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>                 return;
>
> -       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
> +       r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>         if (!WARN_ON(r)) {
> +               struct amdgpu_vram_mgr_resource *vres;
> +
> +               vres = to_amdgpu_vram_mgr_resource(bo->resource);
> +               vres->flags |= DRM_BUDDY_CLEARED;
>                 amdgpu_bo_fence(abo, fence, false);
>                 dma_fence_put(fence);
>         }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 381101d2bf05..50fcd86e1033 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>         }
>  }
>
> +/**
> + * amdgpu_res_cleared - check if blocks are cleared
> + *
> + * @cur: the cursor to extract the block
> + *
> + * Check if the @cur block is cleared
> + */
> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> +{
> +       struct drm_buddy_block *block;
> +
> +       switch (cur->mem_type) {
> +       case TTM_PL_VRAM:
> +               block = cur->node;
> +
> +               if (!amdgpu_vram_mgr_is_cleared(block))
> +                       return false;
> +               break;
> +       default:
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 8722beba494e..bcbffe909b47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>             (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>                 struct dma_fence *wipe_fence = NULL;
>
> -               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
> -                                       false);
> +               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> +                                      false);
>                 if (r) {
>                         goto error;
>                 } else if (wipe_fence) {
> +                       struct amdgpu_vram_mgr_resource *vres;
> +
> +                       vres = to_amdgpu_vram_mgr_resource(bo->resource);
> +                       vres->flags |= DRM_BUDDY_CLEARED;
>                         dma_fence_put(fence);
>                         fence = wipe_fence;
>                 }
> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>         return 0;
>  }
>
> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> +                           struct dma_resv *resv,
> +                           struct dma_fence **fence)
> +{
> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +       struct amdgpu_res_cursor cursor;
> +       struct dma_fence *f = NULL;
> +       u64 addr;
> +       int r;
> +
> +       if (!adev->mman.buffer_funcs_enabled)
> +               return -EINVAL;
> +
> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> +
> +       mutex_lock(&adev->mman.gtt_window_lock);
> +       while (cursor.remaining) {
> +               struct dma_fence *next = NULL;
> +               u64 size;
> +
> +               if (amdgpu_res_cleared(&cursor)) {
> +                       amdgpu_res_next(&cursor, cursor.size);
> +                       continue;
> +               }
> +
> +               /* Never clear more than 256MiB at once to avoid timeouts */
> +               size = min(cursor.size, 256ULL << 20);
> +
> +               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> +                                         1, ring, false, &size, &addr);
> +               if (r)
> +                       goto err;
> +
> +               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> +                                       &next, true, true);
> +               if (r)
> +                       goto err;
> +
> +               dma_fence_put(f);
> +               f = next;
> +
> +               amdgpu_res_next(&cursor, size);
> +       }
> +err:
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +       if (fence)
> +               *fence = dma_fence_get(f);
> +       dma_fence_put(f);
> +
> +       return r;
> +}
> +
>  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                         uint32_t src_data,
>                         struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 65ec82141a8e..b404d89d52e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -38,8 +38,6 @@
>  #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
>  #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
>
> -#define AMDGPU_POISON  0xd0bed0be
> -
>  extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>
> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>                                uint64_t size, bool tmz,
>                                struct dma_resv *resv,
>                                struct dma_fence **f);
> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> +                           struct dma_resv *resv,
> +                           struct dma_fence **fence);
>  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                         uint32_t src_data,
>                         struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index c0c851409241..e494f5bf136a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  {
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>         u64 vis_usage = 0, max_bytes, min_block_size;
>         struct amdgpu_vram_mgr_resource *vres;
>         u64 size, remaining_size, lpfn, fpfn;
> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>
> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;

Is there any reason to not always do this?

Alex


> +
>         if (fpfn || lpfn != mgr->mm.size)
>                 /* Allocate blocks in desired range */
>                 vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>
>         amdgpu_vram_mgr_do_reserve(man);
>
> -       drm_buddy_free_list(mm, &vres->blocks, 0);
> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>         mutex_unlock(&mgr->lock);
>
>         atomic64_sub(vis_usage, &mgr->vis_usage);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> index 0e04e42cf809..8478522d7366 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>         return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>  }
>
> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> +{
> +       return drm_buddy_block_is_clear(block);
> +}
> +
>  static inline struct amdgpu_vram_mgr_resource *
>  to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>  {
> --
> 2.25.1
>
Paneer Selvam, Arunpravin March 26, 2024, 1:59 p.m. UTC | #6
Hi Alex,

On 3/26/2024 7:08 PM, Alex Deucher wrote:
> On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
> <Arunpravin.PaneerSelvam@amd.com> wrote:
>> Add clear page support in vram memory region.
>>
>> v1(Christian):
>>    - Dont handle clear page as TTM flag since when moving the BO back
>>      in from GTT again we don't need that.
>>    - Make a specialized version of amdgpu_fill_buffer() which only
>>      clears the VRAM areas which are not already cleared
>>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>>      amdgpu_object.c
>>
>> v2:
>>    - Modify the function name amdgpu_ttm_* (Alex)
>>    - Drop the delayed parameter (Christian)
>>    - handle amdgpu_res_cleared(&cursor) just above the size
>>      calculation (Christian)
>>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>>      in the free path to properly wait for fences etc.. (Christian)
>>
>> v3(Christian):
>>    - Remove buffer clear code in VRAM manager instead change the
>>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>>      the DRM_BUDDY_CLEARED flag.
>>    - Remove ! from amdgpu_res_cleared(&cursor) check.
>>
>> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>>   6 files changed, 111 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 8bc79924d171..c92d92b28a57 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -39,6 +39,7 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_trace.h"
>>   #include "amdgpu_amdkfd.h"
>> +#include "amdgpu_vram_mgr.h"
>>
>>   /**
>>    * DOC: amdgpu_object
>> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>          if (!amdgpu_bo_support_uswc(bo->flags))
>>                  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>>
>> -       if (adev->ras_enabled)
>> -               bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>> +       bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>
>>          bo->tbo.bdev = &adev->mman.bdev;
>>          if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
>> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>
>>          if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>>              bo->tbo.resource->mem_type == TTM_PL_VRAM) {
>> -               struct dma_fence *fence;
>> +               struct dma_fence *fence = NULL;
>>
>> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
>> +               r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>>                  if (unlikely(r))
>>                          goto fail_unreserve;
>>
>> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
>> -                                  DMA_RESV_USAGE_KERNEL);
>> -               dma_fence_put(fence);
>> +               if (fence) {
>> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
>> +                                          DMA_RESV_USAGE_KERNEL);
>> +                       dma_fence_put(fence);
>> +               }
>>          }
>>          if (!bp->resv)
>>                  amdgpu_bo_unreserve(bo);
>> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>>          if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>>                  return;
>>
>> -       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
>> +       r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>>          if (!WARN_ON(r)) {
>> +               struct amdgpu_vram_mgr_resource *vres;
>> +
>> +               vres = to_amdgpu_vram_mgr_resource(bo->resource);
>> +               vres->flags |= DRM_BUDDY_CLEARED;
>>                  amdgpu_bo_fence(abo, fence, false);
>>                  dma_fence_put(fence);
>>          }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>> index 381101d2bf05..50fcd86e1033 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>>          }
>>   }
>>
>> +/**
>> + * amdgpu_res_cleared - check if blocks are cleared
>> + *
>> + * @cur: the cursor to extract the block
>> + *
>> + * Check if the @cur block is cleared
>> + */
>> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
>> +{
>> +       struct drm_buddy_block *block;
>> +
>> +       switch (cur->mem_type) {
>> +       case TTM_PL_VRAM:
>> +               block = cur->node;
>> +
>> +               if (!amdgpu_vram_mgr_is_cleared(block))
>> +                       return false;
>> +               break;
>> +       default:
>> +               return false;
>> +       }
>> +
>> +       return true;
>> +}
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 8722beba494e..bcbffe909b47 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>              (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>                  struct dma_fence *wipe_fence = NULL;
>>
>> -               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
>> -                                       false);
>> +               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>> +                                      false);
>>                  if (r) {
>>                          goto error;
>>                  } else if (wipe_fence) {
>> +                       struct amdgpu_vram_mgr_resource *vres;
>> +
>> +                       vres = to_amdgpu_vram_mgr_resource(bo->resource);
>> +                       vres->flags |= DRM_BUDDY_CLEARED;
>>                          dma_fence_put(fence);
>>                          fence = wipe_fence;
>>                  }
>> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>>          return 0;
>>   }
>>
>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> +                           struct dma_resv *resv,
>> +                           struct dma_fence **fence)
>> +{
>> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> +       struct amdgpu_res_cursor cursor;
>> +       struct dma_fence *f = NULL;
>> +       u64 addr;
>> +       int r;
>> +
>> +       if (!adev->mman.buffer_funcs_enabled)
>> +               return -EINVAL;
>> +
>> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
>> +
>> +       mutex_lock(&adev->mman.gtt_window_lock);
>> +       while (cursor.remaining) {
>> +               struct dma_fence *next = NULL;
>> +               u64 size;
>> +
>> +               if (amdgpu_res_cleared(&cursor)) {
>> +                       amdgpu_res_next(&cursor, cursor.size);
>> +                       continue;
>> +               }
>> +
>> +               /* Never clear more than 256MiB at once to avoid timeouts */
>> +               size = min(cursor.size, 256ULL << 20);
>> +
>> +               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>> +                                         1, ring, false, &size, &addr);
>> +               if (r)
>> +                       goto err;
>> +
>> +               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>> +                                       &next, true, true);
>> +               if (r)
>> +                       goto err;
>> +
>> +               dma_fence_put(f);
>> +               f = next;
>> +
>> +               amdgpu_res_next(&cursor, size);
>> +       }
>> +err:
>> +       mutex_unlock(&adev->mman.gtt_window_lock);
>> +       if (fence)
>> +               *fence = dma_fence_get(f);
>> +       dma_fence_put(f);
>> +
>> +       return r;
>> +}
>> +
>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>                          uint32_t src_data,
>>                          struct dma_resv *resv,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 65ec82141a8e..b404d89d52e5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -38,8 +38,6 @@
>>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
>>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
>>
>> -#define AMDGPU_POISON  0xd0bed0be
>> -
>>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>
>> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>>                                 uint64_t size, bool tmz,
>>                                 struct dma_resv *resv,
>>                                 struct dma_fence **f);
>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> +                           struct dma_resv *resv,
>> +                           struct dma_fence **fence);
>>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>                          uint32_t src_data,
>>                          struct dma_resv *resv,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index c0c851409241..e494f5bf136a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>   {
>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>>          u64 vis_usage = 0, max_bytes, min_block_size;
>>          struct amdgpu_vram_mgr_resource *vres;
>>          u64 size, remaining_size, lpfn, fpfn;
>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>          if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>                  vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>
>> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> Is there any reason to not always do this?
Here we are trying to keep a pool of cleared memory which are cleared on 
free path and that can given
to the application which requires a zeroed memory. I think here if we 
set always clear the memory,
we would go over the limit of cleared memory in that particular instance 
and the application should wait until
the hardware clears the memory and this might impact the overall 
performance.

Thanks,
Arun.
>
> Alex
>
>
>> +
>>          if (fpfn || lpfn != mgr->mm.size)
>>                  /* Allocate blocks in desired range */
>>                  vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>>
>>          amdgpu_vram_mgr_do_reserve(man);
>>
>> -       drm_buddy_free_list(mm, &vres->blocks, 0);
>> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>>          mutex_unlock(&mgr->lock);
>>
>>          atomic64_sub(vis_usage, &mgr->vis_usage);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>> index 0e04e42cf809..8478522d7366 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>>          return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>>   }
>>
>> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
>> +{
>> +       return drm_buddy_block_is_clear(block);
>> +}
>> +
>>   static inline struct amdgpu_vram_mgr_resource *
>>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>>   {
>> --
>> 2.25.1
>>
Alex Deucher March 26, 2024, 2:01 p.m. UTC | #7
On Tue, Mar 26, 2024 at 9:59 AM Paneer Selvam, Arunpravin
<arunpravin.paneerselvam@amd.com> wrote:
>
> Hi Alex,
>
> On 3/26/2024 7:08 PM, Alex Deucher wrote:
> > On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
> > <Arunpravin.PaneerSelvam@amd.com> wrote:
> >> Add clear page support in vram memory region.
> >>
> >> v1(Christian):
> >>    - Dont handle clear page as TTM flag since when moving the BO back
> >>      in from GTT again we don't need that.
> >>    - Make a specialized version of amdgpu_fill_buffer() which only
> >>      clears the VRAM areas which are not already cleared
> >>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
> >>      amdgpu_object.c
> >>
> >> v2:
> >>    - Modify the function name amdgpu_ttm_* (Alex)
> >>    - Drop the delayed parameter (Christian)
> >>    - handle amdgpu_res_cleared(&cursor) just above the size
> >>      calculation (Christian)
> >>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
> >>      in the free path to properly wait for fences etc.. (Christian)
> >>
> >> v3(Christian):
> >>    - Remove buffer clear code in VRAM manager instead change the
> >>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
> >>      the DRM_BUDDY_CLEARED flag.
> >>    - Remove ! from amdgpu_res_cleared(&cursor) check.
> >>
> >> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> >> Suggested-by: Christian König <christian.koenig@amd.com>
> >> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
> >>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
> >>   6 files changed, 111 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> index 8bc79924d171..c92d92b28a57 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> @@ -39,6 +39,7 @@
> >>   #include "amdgpu.h"
> >>   #include "amdgpu_trace.h"
> >>   #include "amdgpu_amdkfd.h"
> >> +#include "amdgpu_vram_mgr.h"
> >>
> >>   /**
> >>    * DOC: amdgpu_object
> >> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> >>          if (!amdgpu_bo_support_uswc(bo->flags))
> >>                  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> >>
> >> -       if (adev->ras_enabled)
> >> -               bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> >> +       bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> >>
> >>          bo->tbo.bdev = &adev->mman.bdev;
> >>          if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
> >> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> >>
> >>          if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
> >>              bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> >> -               struct dma_fence *fence;
> >> +               struct dma_fence *fence = NULL;
> >>
> >> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> >> +               r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
> >>                  if (unlikely(r))
> >>                          goto fail_unreserve;
> >>
> >> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
> >> -                                  DMA_RESV_USAGE_KERNEL);
> >> -               dma_fence_put(fence);
> >> +               if (fence) {
> >> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
> >> +                                          DMA_RESV_USAGE_KERNEL);
> >> +                       dma_fence_put(fence);
> >> +               }
> >>          }
> >>          if (!bp->resv)
> >>                  amdgpu_bo_unreserve(bo);
> >> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> >>          if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
> >>                  return;
> >>
> >> -       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
> >> +       r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
> >>          if (!WARN_ON(r)) {
> >> +               struct amdgpu_vram_mgr_resource *vres;
> >> +
> >> +               vres = to_amdgpu_vram_mgr_resource(bo->resource);
> >> +               vres->flags |= DRM_BUDDY_CLEARED;
> >>                  amdgpu_bo_fence(abo, fence, false);
> >>                  dma_fence_put(fence);
> >>          }
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> >> index 381101d2bf05..50fcd86e1033 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> >> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
> >>          }
> >>   }
> >>
> >> +/**
> >> + * amdgpu_res_cleared - check if blocks are cleared
> >> + *
> >> + * @cur: the cursor to extract the block
> >> + *
> >> + * Check if the @cur block is cleared
> >> + */
> >> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> >> +{
> >> +       struct drm_buddy_block *block;
> >> +
> >> +       switch (cur->mem_type) {
> >> +       case TTM_PL_VRAM:
> >> +               block = cur->node;
> >> +
> >> +               if (!amdgpu_vram_mgr_is_cleared(block))
> >> +                       return false;
> >> +               break;
> >> +       default:
> >> +               return false;
> >> +       }
> >> +
> >> +       return true;
> >> +}
> >> +
> >>   #endif
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index 8722beba494e..bcbffe909b47 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> >>              (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> >>                  struct dma_fence *wipe_fence = NULL;
> >>
> >> -               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
> >> -                                       false);
> >> +               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> >> +                                      false);
> >>                  if (r) {
> >>                          goto error;
> >>                  } else if (wipe_fence) {
> >> +                       struct amdgpu_vram_mgr_resource *vres;
> >> +
> >> +                       vres = to_amdgpu_vram_mgr_resource(bo->resource);
> >> +                       vres->flags |= DRM_BUDDY_CLEARED;
> >>                          dma_fence_put(fence);
> >>                          fence = wipe_fence;
> >>                  }
> >> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> >>          return 0;
> >>   }
> >>
> >> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> >> +                           struct dma_resv *resv,
> >> +                           struct dma_fence **fence)
> >> +{
> >> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> >> +       struct amdgpu_res_cursor cursor;
> >> +       struct dma_fence *f = NULL;
> >> +       u64 addr;
> >> +       int r;
> >> +
> >> +       if (!adev->mman.buffer_funcs_enabled)
> >> +               return -EINVAL;
> >> +
> >> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> >> +
> >> +       mutex_lock(&adev->mman.gtt_window_lock);
> >> +       while (cursor.remaining) {
> >> +               struct dma_fence *next = NULL;
> >> +               u64 size;
> >> +
> >> +               if (amdgpu_res_cleared(&cursor)) {
> >> +                       amdgpu_res_next(&cursor, cursor.size);
> >> +                       continue;
> >> +               }
> >> +
> >> +               /* Never clear more than 256MiB at once to avoid timeouts */
> >> +               size = min(cursor.size, 256ULL << 20);
> >> +
> >> +               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> >> +                                         1, ring, false, &size, &addr);
> >> +               if (r)
> >> +                       goto err;
> >> +
> >> +               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> >> +                                       &next, true, true);
> >> +               if (r)
> >> +                       goto err;
> >> +
> >> +               dma_fence_put(f);
> >> +               f = next;
> >> +
> >> +               amdgpu_res_next(&cursor, size);
> >> +       }
> >> +err:
> >> +       mutex_unlock(&adev->mman.gtt_window_lock);
> >> +       if (fence)
> >> +               *fence = dma_fence_get(f);
> >> +       dma_fence_put(f);
> >> +
> >> +       return r;
> >> +}
> >> +
> >>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> >>                          uint32_t src_data,
> >>                          struct dma_resv *resv,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> >> index 65ec82141a8e..b404d89d52e5 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> >> @@ -38,8 +38,6 @@
> >>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
> >>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
> >>
> >> -#define AMDGPU_POISON  0xd0bed0be
> >> -
> >>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
> >>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> >>
> >> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> >>                                 uint64_t size, bool tmz,
> >>                                 struct dma_resv *resv,
> >>                                 struct dma_fence **f);
> >> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> >> +                           struct dma_resv *resv,
> >> +                           struct dma_fence **fence);
> >>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> >>                          uint32_t src_data,
> >>                          struct dma_resv *resv,
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >> index c0c851409241..e494f5bf136a 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> >> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> >>   {
> >>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> >>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
> >> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
> >>          u64 vis_usage = 0, max_bytes, min_block_size;
> >>          struct amdgpu_vram_mgr_resource *vres;
> >>          u64 size, remaining_size, lpfn, fpfn;
> >> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> >>          if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> >>                  vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
> >>
> >> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> >> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> > Is there any reason to not always do this?
> Here we are trying to keep a pool of cleared memory which are cleared on
> free path and that can given
> to the application which requires a zeroed memory. I think here if we
> set always clear the memory,
> we would go over the limit of cleared memory in that particular instance
> and the application should wait until
> the hardware clears the memory and this might impact the overall
> performance.

I'd like to have the driver always deliver cleared memory.

Alex

>
> Thanks,
> Arun.
> >
> > Alex
> >
> >
> >> +
> >>          if (fpfn || lpfn != mgr->mm.size)
> >>                  /* Allocate blocks in desired range */
> >>                  vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> >> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
> >>
> >>          amdgpu_vram_mgr_do_reserve(man);
> >>
> >> -       drm_buddy_free_list(mm, &vres->blocks, 0);
> >> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
> >>          mutex_unlock(&mgr->lock);
> >>
> >>          atomic64_sub(vis_usage, &mgr->vis_usage);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> >> index 0e04e42cf809..8478522d7366 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> >> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
> >>          return (u64)PAGE_SIZE << drm_buddy_block_order(block);
> >>   }
> >>
> >> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> >> +{
> >> +       return drm_buddy_block_is_clear(block);
> >> +}
> >> +
> >>   static inline struct amdgpu_vram_mgr_resource *
> >>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
> >>   {
> >> --
> >> 2.25.1
> >>
>
Alex Deucher March 26, 2024, 2:53 p.m. UTC | #8
On Tue, Mar 26, 2024 at 10:01 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Tue, Mar 26, 2024 at 9:59 AM Paneer Selvam, Arunpravin
> <arunpravin.paneerselvam@amd.com> wrote:
> >
> > Hi Alex,
> >
> > On 3/26/2024 7:08 PM, Alex Deucher wrote:
> > > On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
> > > <Arunpravin.PaneerSelvam@amd.com> wrote:
> > >> Add clear page support in vram memory region.
> > >>
> > >> v1(Christian):
> > >>    - Dont handle clear page as TTM flag since when moving the BO back
> > >>      in from GTT again we don't need that.
> > >>    - Make a specialized version of amdgpu_fill_buffer() which only
> > >>      clears the VRAM areas which are not already cleared
> > >>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
> > >>      amdgpu_object.c
> > >>
> > >> v2:
> > >>    - Modify the function name amdgpu_ttm_* (Alex)
> > >>    - Drop the delayed parameter (Christian)
> > >>    - handle amdgpu_res_cleared(&cursor) just above the size
> > >>      calculation (Christian)
> > >>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
> > >>      in the free path to properly wait for fences etc.. (Christian)
> > >>
> > >> v3(Christian):
> > >>    - Remove buffer clear code in VRAM manager instead change the
> > >>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
> > >>      the DRM_BUDDY_CLEARED flag.
> > >>    - Remove ! from amdgpu_res_cleared(&cursor) check.
> > >>
> > >> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> > >> Suggested-by: Christian König <christian.koenig@amd.com>
> > >> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
> > >> ---
> > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
> > >>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
> > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
> > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
> > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
> > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
> > >>   6 files changed, 111 insertions(+), 13 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > >> index 8bc79924d171..c92d92b28a57 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > >> @@ -39,6 +39,7 @@
> > >>   #include "amdgpu.h"
> > >>   #include "amdgpu_trace.h"
> > >>   #include "amdgpu_amdkfd.h"
> > >> +#include "amdgpu_vram_mgr.h"
> > >>
> > >>   /**
> > >>    * DOC: amdgpu_object
> > >> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> > >>          if (!amdgpu_bo_support_uswc(bo->flags))
> > >>                  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> > >>
> > >> -       if (adev->ras_enabled)
> > >> -               bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> > >> +       bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> > >>
> > >>          bo->tbo.bdev = &adev->mman.bdev;
> > >>          if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
> > >> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> > >>
> > >>          if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
> > >>              bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> > >> -               struct dma_fence *fence;
> > >> +               struct dma_fence *fence = NULL;
> > >>
> > >> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> > >> +               r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
> > >>                  if (unlikely(r))
> > >>                          goto fail_unreserve;
> > >>
> > >> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
> > >> -                                  DMA_RESV_USAGE_KERNEL);
> > >> -               dma_fence_put(fence);
> > >> +               if (fence) {
> > >> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
> > >> +                                          DMA_RESV_USAGE_KERNEL);
> > >> +                       dma_fence_put(fence);
> > >> +               }
> > >>          }
> > >>          if (!bp->resv)
> > >>                  amdgpu_bo_unreserve(bo);
> > >> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> > >>          if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
> > >>                  return;
> > >>
> > >> -       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
> > >> +       r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
> > >>          if (!WARN_ON(r)) {
> > >> +               struct amdgpu_vram_mgr_resource *vres;
> > >> +
> > >> +               vres = to_amdgpu_vram_mgr_resource(bo->resource);
> > >> +               vres->flags |= DRM_BUDDY_CLEARED;
> > >>                  amdgpu_bo_fence(abo, fence, false);
> > >>                  dma_fence_put(fence);
> > >>          }
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> > >> index 381101d2bf05..50fcd86e1033 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> > >> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
> > >>          }
> > >>   }
> > >>
> > >> +/**
> > >> + * amdgpu_res_cleared - check if blocks are cleared
> > >> + *
> > >> + * @cur: the cursor to extract the block
> > >> + *
> > >> + * Check if the @cur block is cleared
> > >> + */
> > >> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> > >> +{
> > >> +       struct drm_buddy_block *block;
> > >> +
> > >> +       switch (cur->mem_type) {
> > >> +       case TTM_PL_VRAM:
> > >> +               block = cur->node;
> > >> +
> > >> +               if (!amdgpu_vram_mgr_is_cleared(block))
> > >> +                       return false;
> > >> +               break;
> > >> +       default:
> > >> +               return false;
> > >> +       }
> > >> +
> > >> +       return true;
> > >> +}
> > >> +
> > >>   #endif
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > >> index 8722beba494e..bcbffe909b47 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > >> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> > >>              (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> > >>                  struct dma_fence *wipe_fence = NULL;
> > >>
> > >> -               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
> > >> -                                       false);
> > >> +               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> > >> +                                      false);
> > >>                  if (r) {
> > >>                          goto error;
> > >>                  } else if (wipe_fence) {
> > >> +                       struct amdgpu_vram_mgr_resource *vres;
> > >> +
> > >> +                       vres = to_amdgpu_vram_mgr_resource(bo->resource);
> > >> +                       vres->flags |= DRM_BUDDY_CLEARED;
> > >>                          dma_fence_put(fence);
> > >>                          fence = wipe_fence;
> > >>                  }
> > >> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> > >>          return 0;
> > >>   }
> > >>
> > >> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> > >> +                           struct dma_resv *resv,
> > >> +                           struct dma_fence **fence)
> > >> +{
> > >> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > >> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> > >> +       struct amdgpu_res_cursor cursor;
> > >> +       struct dma_fence *f = NULL;
> > >> +       u64 addr;
> > >> +       int r;
> > >> +
> > >> +       if (!adev->mman.buffer_funcs_enabled)
> > >> +               return -EINVAL;
> > >> +
> > >> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> > >> +
> > >> +       mutex_lock(&adev->mman.gtt_window_lock);
> > >> +       while (cursor.remaining) {
> > >> +               struct dma_fence *next = NULL;
> > >> +               u64 size;
> > >> +
> > >> +               if (amdgpu_res_cleared(&cursor)) {
> > >> +                       amdgpu_res_next(&cursor, cursor.size);
> > >> +                       continue;
> > >> +               }
> > >> +
> > >> +               /* Never clear more than 256MiB at once to avoid timeouts */
> > >> +               size = min(cursor.size, 256ULL << 20);
> > >> +
> > >> +               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> > >> +                                         1, ring, false, &size, &addr);
> > >> +               if (r)
> > >> +                       goto err;
> > >> +
> > >> +               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> > >> +                                       &next, true, true);
> > >> +               if (r)
> > >> +                       goto err;
> > >> +
> > >> +               dma_fence_put(f);
> > >> +               f = next;
> > >> +
> > >> +               amdgpu_res_next(&cursor, size);
> > >> +       }
> > >> +err:
> > >> +       mutex_unlock(&adev->mman.gtt_window_lock);
> > >> +       if (fence)
> > >> +               *fence = dma_fence_get(f);
> > >> +       dma_fence_put(f);
> > >> +
> > >> +       return r;
> > >> +}
> > >> +
> > >>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> > >>                          uint32_t src_data,
> > >>                          struct dma_resv *resv,
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > >> index 65ec82141a8e..b404d89d52e5 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > >> @@ -38,8 +38,6 @@
> > >>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
> > >>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
> > >>
> > >> -#define AMDGPU_POISON  0xd0bed0be
> > >> -
> > >>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
> > >>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> > >>
> > >> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> > >>                                 uint64_t size, bool tmz,
> > >>                                 struct dma_resv *resv,
> > >>                                 struct dma_fence **f);
> > >> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> > >> +                           struct dma_resv *resv,
> > >> +                           struct dma_fence **fence);
> > >>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> > >>                          uint32_t src_data,
> > >>                          struct dma_resv *resv,
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> > >> index c0c851409241..e494f5bf136a 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> > >> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> > >>   {
> > >>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> > >>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
> > >> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
> > >>          u64 vis_usage = 0, max_bytes, min_block_size;
> > >>          struct amdgpu_vram_mgr_resource *vres;
> > >>          u64 size, remaining_size, lpfn, fpfn;
> > >> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> > >>          if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> > >>                  vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
> > >>
> > >> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> > >> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> > > Is there any reason to not always do this?
> > Here we are trying to keep a pool of cleared memory which are cleared on
> > free path and that can given
> > to the application which requires a zeroed memory. I think here if we
> > set always clear the memory,
> > we would go over the limit of cleared memory in that particular instance
> > and the application should wait until
> > the hardware clears the memory and this might impact the overall
> > performance.
>
> I'd like to have the driver always deliver cleared memory.

Actually, I think we can just always set the flag in the allocation IOCTLs.

Alex

>
> Alex
>
> >
> > Thanks,
> > Arun.
> > >
> > > Alex
> > >
> > >
> > >> +
> > >>          if (fpfn || lpfn != mgr->mm.size)
> > >>                  /* Allocate blocks in desired range */
> > >>                  vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> > >> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
> > >>
> > >>          amdgpu_vram_mgr_do_reserve(man);
> > >>
> > >> -       drm_buddy_free_list(mm, &vres->blocks, 0);
> > >> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
> > >>          mutex_unlock(&mgr->lock);
> > >>
> > >>          atomic64_sub(vis_usage, &mgr->vis_usage);
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> > >> index 0e04e42cf809..8478522d7366 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> > >> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
> > >>          return (u64)PAGE_SIZE << drm_buddy_block_order(block);
> > >>   }
> > >>
> > >> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> > >> +{
> > >> +       return drm_buddy_block_is_clear(block);
> > >> +}
> > >> +
> > >>   static inline struct amdgpu_vram_mgr_resource *
> > >>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
> > >>   {
> > >> --
> > >> 2.25.1
> > >>
> >
Paneer Selvam, Arunpravin March 27, 2024, 8:23 a.m. UTC | #9
Hi Alex,

On 3/26/2024 8:23 PM, Alex Deucher wrote:
> On Tue, Mar 26, 2024 at 10:01 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Tue, Mar 26, 2024 at 9:59 AM Paneer Selvam, Arunpravin
>> <arunpravin.paneerselvam@amd.com> wrote:
>>> Hi Alex,
>>>
>>> On 3/26/2024 7:08 PM, Alex Deucher wrote:
>>>> On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
>>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>>> Add clear page support in vram memory region.
>>>>>
>>>>> v1(Christian):
>>>>>     - Dont handle clear page as TTM flag since when moving the BO back
>>>>>       in from GTT again we don't need that.
>>>>>     - Make a specialized version of amdgpu_fill_buffer() which only
>>>>>       clears the VRAM areas which are not already cleared
>>>>>     - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>>>>>       amdgpu_object.c
>>>>>
>>>>> v2:
>>>>>     - Modify the function name amdgpu_ttm_* (Alex)
>>>>>     - Drop the delayed parameter (Christian)
>>>>>     - handle amdgpu_res_cleared(&cursor) just above the size
>>>>>       calculation (Christian)
>>>>>     - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>>>>>       in the free path to properly wait for fences etc.. (Christian)
>>>>>
>>>>> v3(Christian):
>>>>>     - Remove buffer clear code in VRAM manager instead change the
>>>>>       AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>>>>>       the DRM_BUDDY_CLEARED flag.
>>>>>     - Remove ! from amdgpu_res_cleared(&cursor) check.
>>>>>
>>>>> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
>>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>>> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>>>>>    6 files changed, 111 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> index 8bc79924d171..c92d92b28a57 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> @@ -39,6 +39,7 @@
>>>>>    #include "amdgpu.h"
>>>>>    #include "amdgpu_trace.h"
>>>>>    #include "amdgpu_amdkfd.h"
>>>>> +#include "amdgpu_vram_mgr.h"
>>>>>
>>>>>    /**
>>>>>     * DOC: amdgpu_object
>>>>> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>>>           if (!amdgpu_bo_support_uswc(bo->flags))
>>>>>                   bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>>>>>
>>>>> -       if (adev->ras_enabled)
>>>>> -               bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>>> +       bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>>>
>>>>>           bo->tbo.bdev = &adev->mman.bdev;
>>>>>           if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
>>>>> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>>>
>>>>>           if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>>>>>               bo->tbo.resource->mem_type == TTM_PL_VRAM) {
>>>>> -               struct dma_fence *fence;
>>>>> +               struct dma_fence *fence = NULL;
>>>>>
>>>>> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
>>>>> +               r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>>>>>                   if (unlikely(r))
>>>>>                           goto fail_unreserve;
>>>>>
>>>>> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
>>>>> -                                  DMA_RESV_USAGE_KERNEL);
>>>>> -               dma_fence_put(fence);
>>>>> +               if (fence) {
>>>>> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
>>>>> +                                          DMA_RESV_USAGE_KERNEL);
>>>>> +                       dma_fence_put(fence);
>>>>> +               }
>>>>>           }
>>>>>           if (!bp->resv)
>>>>>                   amdgpu_bo_unreserve(bo);
>>>>> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>>>>>           if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>>>>>                   return;
>>>>>
>>>>> -       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
>>>>> +       r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>>>>>           if (!WARN_ON(r)) {
>>>>> +               struct amdgpu_vram_mgr_resource *vres;
>>>>> +
>>>>> +               vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>>>> +               vres->flags |= DRM_BUDDY_CLEARED;
>>>>>                   amdgpu_bo_fence(abo, fence, false);
>>>>>                   dma_fence_put(fence);
>>>>>           }
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>>> index 381101d2bf05..50fcd86e1033 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>>> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>>>>>           }
>>>>>    }
>>>>>
>>>>> +/**
>>>>> + * amdgpu_res_cleared - check if blocks are cleared
>>>>> + *
>>>>> + * @cur: the cursor to extract the block
>>>>> + *
>>>>> + * Check if the @cur block is cleared
>>>>> + */
>>>>> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
>>>>> +{
>>>>> +       struct drm_buddy_block *block;
>>>>> +
>>>>> +       switch (cur->mem_type) {
>>>>> +       case TTM_PL_VRAM:
>>>>> +               block = cur->node;
>>>>> +
>>>>> +               if (!amdgpu_vram_mgr_is_cleared(block))
>>>>> +                       return false;
>>>>> +               break;
>>>>> +       default:
>>>>> +               return false;
>>>>> +       }
>>>>> +
>>>>> +       return true;
>>>>> +}
>>>>> +
>>>>>    #endif
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> index 8722beba494e..bcbffe909b47 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>>>>               (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>>>>                   struct dma_fence *wipe_fence = NULL;
>>>>>
>>>>> -               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
>>>>> -                                       false);
>>>>> +               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>>>>> +                                      false);
>>>>>                   if (r) {
>>>>>                           goto error;
>>>>>                   } else if (wipe_fence) {
>>>>> +                       struct amdgpu_vram_mgr_resource *vres;
>>>>> +
>>>>> +                       vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>>>> +                       vres->flags |= DRM_BUDDY_CLEARED;
>>>>>                           dma_fence_put(fence);
>>>>>                           fence = wipe_fence;
>>>>>                   }
>>>>> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>>>>>           return 0;
>>>>>    }
>>>>>
>>>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>>>> +                           struct dma_resv *resv,
>>>>> +                           struct dma_fence **fence)
>>>>> +{
>>>>> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>>>> +       struct amdgpu_res_cursor cursor;
>>>>> +       struct dma_fence *f = NULL;
>>>>> +       u64 addr;
>>>>> +       int r;
>>>>> +
>>>>> +       if (!adev->mman.buffer_funcs_enabled)
>>>>> +               return -EINVAL;
>>>>> +
>>>>> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
>>>>> +
>>>>> +       mutex_lock(&adev->mman.gtt_window_lock);
>>>>> +       while (cursor.remaining) {
>>>>> +               struct dma_fence *next = NULL;
>>>>> +               u64 size;
>>>>> +
>>>>> +               if (amdgpu_res_cleared(&cursor)) {
>>>>> +                       amdgpu_res_next(&cursor, cursor.size);
>>>>> +                       continue;
>>>>> +               }
>>>>> +
>>>>> +               /* Never clear more than 256MiB at once to avoid timeouts */
>>>>> +               size = min(cursor.size, 256ULL << 20);
>>>>> +
>>>>> +               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>>>>> +                                         1, ring, false, &size, &addr);
>>>>> +               if (r)
>>>>> +                       goto err;
>>>>> +
>>>>> +               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>>>>> +                                       &next, true, true);
>>>>> +               if (r)
>>>>> +                       goto err;
>>>>> +
>>>>> +               dma_fence_put(f);
>>>>> +               f = next;
>>>>> +
>>>>> +               amdgpu_res_next(&cursor, size);
>>>>> +       }
>>>>> +err:
>>>>> +       mutex_unlock(&adev->mman.gtt_window_lock);
>>>>> +       if (fence)
>>>>> +               *fence = dma_fence_get(f);
>>>>> +       dma_fence_put(f);
>>>>> +
>>>>> +       return r;
>>>>> +}
>>>>> +
>>>>>    int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>                           uint32_t src_data,
>>>>>                           struct dma_resv *resv,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> index 65ec82141a8e..b404d89d52e5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> @@ -38,8 +38,6 @@
>>>>>    #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
>>>>>    #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
>>>>>
>>>>> -#define AMDGPU_POISON  0xd0bed0be
>>>>> -
>>>>>    extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>>>>>    extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>>>>
>>>>> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>>>>>                                  uint64_t size, bool tmz,
>>>>>                                  struct dma_resv *resv,
>>>>>                                  struct dma_fence **f);
>>>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>>>> +                           struct dma_resv *resv,
>>>>> +                           struct dma_fence **fence);
>>>>>    int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>                           uint32_t src_data,
>>>>>                           struct dma_resv *resv,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>>> index c0c851409241..e494f5bf136a 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>>> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>    {
>>>>>           struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>>           struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>>> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>>>>>           u64 vis_usage = 0, max_bytes, min_block_size;
>>>>>           struct amdgpu_vram_mgr_resource *vres;
>>>>>           u64 size, remaining_size, lpfn, fpfn;
>>>>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>           if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>>                   vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>>>>
>>>>> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>>>>> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
>>>> Is there any reason to not always do this?
>>> Here we are trying to keep a pool of cleared memory which are cleared on
>>> free path and that can given
>>> to the application which requires a zeroed memory. I think here if we
>>> set always clear the memory,
>>> we would go over the limit of cleared memory in that particular instance
>>> and the application should wait until
>>> the hardware clears the memory and this might impact the overall
>>> performance.
>> I'd like to have the driver always deliver cleared memory.
> Actually, I think we can just always set the flag in the allocation IOCTLs.
Sure, we can set the flag in the allocation IOCTLs.
Thanks,
Arun.
>
> Alex
>
>> Alex
>>
>>> Thanks,
>>> Arun.
>>>> Alex
>>>>
>>>>
>>>>> +
>>>>>           if (fpfn || lpfn != mgr->mm.size)
>>>>>                   /* Allocate blocks in desired range */
>>>>>                   vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>>> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>>>>>
>>>>>           amdgpu_vram_mgr_do_reserve(man);
>>>>>
>>>>> -       drm_buddy_free_list(mm, &vres->blocks, 0);
>>>>> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>>>>>           mutex_unlock(&mgr->lock);
>>>>>
>>>>>           atomic64_sub(vis_usage, &mgr->vis_usage);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>>> index 0e04e42cf809..8478522d7366 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>>> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>>>>>           return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>>>>>    }
>>>>>
>>>>> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
>>>>> +{
>>>>> +       return drm_buddy_block_is_clear(block);
>>>>> +}
>>>>> +
>>>>>    static inline struct amdgpu_vram_mgr_resource *
>>>>>    to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>>>>>    {
>>>>> --
>>>>> 2.25.1
>>>>>
Christian König April 2, 2024, 8:17 a.m. UTC | #10
Am 26.03.24 um 15:53 schrieb Alex Deucher:
> On Tue, Mar 26, 2024 at 10:01 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Tue, Mar 26, 2024 at 9:59 AM Paneer Selvam, Arunpravin
>> <arunpravin.paneerselvam@amd.com> wrote:
>>> Hi Alex,
>>>
>>> On 3/26/2024 7:08 PM, Alex Deucher wrote:
>>>> On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
>>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>>> Add clear page support in vram memory region.
>>>>>
>>>>> v1(Christian):
>>>>>     - Dont handle clear page as TTM flag since when moving the BO back
>>>>>       in from GTT again we don't need that.
>>>>>     - Make a specialized version of amdgpu_fill_buffer() which only
>>>>>       clears the VRAM areas which are not already cleared
>>>>>     - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>>>>>       amdgpu_object.c
>>>>>
>>>>> v2:
>>>>>     - Modify the function name amdgpu_ttm_* (Alex)
>>>>>     - Drop the delayed parameter (Christian)
>>>>>     - handle amdgpu_res_cleared(&cursor) just above the size
>>>>>       calculation (Christian)
>>>>>     - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>>>>>       in the free path to properly wait for fences etc.. (Christian)
>>>>>
>>>>> v3(Christian):
>>>>>     - Remove buffer clear code in VRAM manager instead change the
>>>>>       AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>>>>>       the DRM_BUDDY_CLEARED flag.
>>>>>     - Remove ! from amdgpu_res_cleared(&cursor) check.
>>>>>
>>>>> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
>>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>>> Acked-by: Felix Kuehling <felix.kuehling@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>>>>>    6 files changed, 111 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> index 8bc79924d171..c92d92b28a57 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> @@ -39,6 +39,7 @@
>>>>>    #include "amdgpu.h"
>>>>>    #include "amdgpu_trace.h"
>>>>>    #include "amdgpu_amdkfd.h"
>>>>> +#include "amdgpu_vram_mgr.h"
>>>>>
>>>>>    /**
>>>>>     * DOC: amdgpu_object
>>>>> @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>>>           if (!amdgpu_bo_support_uswc(bo->flags))
>>>>>                   bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>>>>>
>>>>> -       if (adev->ras_enabled)
>>>>> -               bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>>> +       bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>>>>>
>>>>>           bo->tbo.bdev = &adev->mman.bdev;
>>>>>           if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
>>>>> @@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>>>>>
>>>>>           if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>>>>>               bo->tbo.resource->mem_type == TTM_PL_VRAM) {
>>>>> -               struct dma_fence *fence;
>>>>> +               struct dma_fence *fence = NULL;
>>>>>
>>>>> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
>>>>> +               r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>>>>>                   if (unlikely(r))
>>>>>                           goto fail_unreserve;
>>>>>
>>>>> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
>>>>> -                                  DMA_RESV_USAGE_KERNEL);
>>>>> -               dma_fence_put(fence);
>>>>> +               if (fence) {
>>>>> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
>>>>> +                                          DMA_RESV_USAGE_KERNEL);
>>>>> +                       dma_fence_put(fence);
>>>>> +               }
>>>>>           }
>>>>>           if (!bp->resv)
>>>>>                   amdgpu_bo_unreserve(bo);
>>>>> @@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>>>>>           if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>>>>>                   return;
>>>>>
>>>>> -       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
>>>>> +       r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>>>>>           if (!WARN_ON(r)) {
>>>>> +               struct amdgpu_vram_mgr_resource *vres;
>>>>> +
>>>>> +               vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>>>> +               vres->flags |= DRM_BUDDY_CLEARED;
>>>>>                   amdgpu_bo_fence(abo, fence, false);
>>>>>                   dma_fence_put(fence);
>>>>>           }
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>>> index 381101d2bf05..50fcd86e1033 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>>>>> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>>>>>           }
>>>>>    }
>>>>>
>>>>> +/**
>>>>> + * amdgpu_res_cleared - check if blocks are cleared
>>>>> + *
>>>>> + * @cur: the cursor to extract the block
>>>>> + *
>>>>> + * Check if the @cur block is cleared
>>>>> + */
>>>>> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
>>>>> +{
>>>>> +       struct drm_buddy_block *block;
>>>>> +
>>>>> +       switch (cur->mem_type) {
>>>>> +       case TTM_PL_VRAM:
>>>>> +               block = cur->node;
>>>>> +
>>>>> +               if (!amdgpu_vram_mgr_is_cleared(block))
>>>>> +                       return false;
>>>>> +               break;
>>>>> +       default:
>>>>> +               return false;
>>>>> +       }
>>>>> +
>>>>> +       return true;
>>>>> +}
>>>>> +
>>>>>    #endif
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> index 8722beba494e..bcbffe909b47 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> @@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>>>>               (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>>>>                   struct dma_fence *wipe_fence = NULL;
>>>>>
>>>>> -               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
>>>>> -                                       false);
>>>>> +               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>>>>> +                                      false);
>>>>>                   if (r) {
>>>>>                           goto error;
>>>>>                   } else if (wipe_fence) {
>>>>> +                       struct amdgpu_vram_mgr_resource *vres;
>>>>> +
>>>>> +                       vres = to_amdgpu_vram_mgr_resource(bo->resource);
>>>>> +                       vres->flags |= DRM_BUDDY_CLEARED;
>>>>>                           dma_fence_put(fence);
>>>>>                           fence = wipe_fence;
>>>>>                   }
>>>>> @@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>>>>>           return 0;
>>>>>    }
>>>>>
>>>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>>>> +                           struct dma_resv *resv,
>>>>> +                           struct dma_fence **fence)
>>>>> +{
>>>>> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>>>> +       struct amdgpu_res_cursor cursor;
>>>>> +       struct dma_fence *f = NULL;
>>>>> +       u64 addr;
>>>>> +       int r;
>>>>> +
>>>>> +       if (!adev->mman.buffer_funcs_enabled)
>>>>> +               return -EINVAL;
>>>>> +
>>>>> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
>>>>> +
>>>>> +       mutex_lock(&adev->mman.gtt_window_lock);
>>>>> +       while (cursor.remaining) {
>>>>> +               struct dma_fence *next = NULL;
>>>>> +               u64 size;
>>>>> +
>>>>> +               if (amdgpu_res_cleared(&cursor)) {
>>>>> +                       amdgpu_res_next(&cursor, cursor.size);
>>>>> +                       continue;
>>>>> +               }
>>>>> +
>>>>> +               /* Never clear more than 256MiB at once to avoid timeouts */
>>>>> +               size = min(cursor.size, 256ULL << 20);
>>>>> +
>>>>> +               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>>>>> +                                         1, ring, false, &size, &addr);
>>>>> +               if (r)
>>>>> +                       goto err;
>>>>> +
>>>>> +               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>>>>> +                                       &next, true, true);
>>>>> +               if (r)
>>>>> +                       goto err;
>>>>> +
>>>>> +               dma_fence_put(f);
>>>>> +               f = next;
>>>>> +
>>>>> +               amdgpu_res_next(&cursor, size);
>>>>> +       }
>>>>> +err:
>>>>> +       mutex_unlock(&adev->mman.gtt_window_lock);
>>>>> +       if (fence)
>>>>> +               *fence = dma_fence_get(f);
>>>>> +       dma_fence_put(f);
>>>>> +
>>>>> +       return r;
>>>>> +}
>>>>> +
>>>>>    int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>                           uint32_t src_data,
>>>>>                           struct dma_resv *resv,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> index 65ec82141a8e..b404d89d52e5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> @@ -38,8 +38,6 @@
>>>>>    #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
>>>>>    #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
>>>>>
>>>>> -#define AMDGPU_POISON  0xd0bed0be
>>>>> -
>>>>>    extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>>>>>    extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>>>>
>>>>> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>>>>>                                  uint64_t size, bool tmz,
>>>>>                                  struct dma_resv *resv,
>>>>>                                  struct dma_fence **f);
>>>>> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>>>> +                           struct dma_resv *resv,
>>>>> +                           struct dma_fence **fence);
>>>>>    int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>                           uint32_t src_data,
>>>>>                           struct dma_resv *resv,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>>> index c0c851409241..e494f5bf136a 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>>> @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>    {
>>>>>           struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>>           struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>>> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>>>>>           u64 vis_usage = 0, max_bytes, min_block_size;
>>>>>           struct amdgpu_vram_mgr_resource *vres;
>>>>>           u64 size, remaining_size, lpfn, fpfn;
>>>>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>           if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>>                   vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>>>>
>>>>> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>>>>> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
>>>> Is there any reason to not always do this?
>>> Here we are trying to keep a pool of cleared memory which are cleared on
>>> free path and that can given
>>> to the application which requires a zeroed memory. I think here if we
>>> set always clear the memory,
>>> we would go over the limit of cleared memory in that particular instance
>>> and the application should wait until
>>> the hardware clears the memory and this might impact the overall
>>> performance.
>> I'd like to have the driver always deliver cleared memory.
> Actually, I think we can just always set the flag in the allocation IOCTLs.

We have use cases where that hurts as. Especially during boot when the 
backing VRAM isn't cleared yet.

That's one of the reasons why we never always cleared the memory.

Christian.

>
> Alex
>
>> Alex
>>
>>> Thanks,
>>> Arun.
>>>> Alex
>>>>
>>>>
>>>>> +
>>>>>           if (fpfn || lpfn != mgr->mm.size)
>>>>>                   /* Allocate blocks in desired range */
>>>>>                   vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>>> @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>>>>>
>>>>>           amdgpu_vram_mgr_do_reserve(man);
>>>>>
>>>>> -       drm_buddy_free_list(mm, &vres->blocks, 0);
>>>>> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>>>>>           mutex_unlock(&mgr->lock);
>>>>>
>>>>>           atomic64_sub(vis_usage, &mgr->vis_usage);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>>> index 0e04e42cf809..8478522d7366 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
>>>>> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>>>>>           return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>>>>>    }
>>>>>
>>>>> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
>>>>> +{
>>>>> +       return drm_buddy_block_is_clear(block);
>>>>> +}
>>>>> +
>>>>>    static inline struct amdgpu_vram_mgr_resource *
>>>>>    to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>>>>>    {
>>>>> --
>>>>> 2.25.1
>>>>>
Michel Dänzer April 3, 2024, 9:01 a.m. UTC | #11
On 2024-04-02 10:17, Christian König wrote:
> Am 26.03.24 um 15:53 schrieb Alex Deucher:
>> On Tue, Mar 26, 2024 at 10:01 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>>> On Tue, Mar 26, 2024 at 9:59 AM Paneer Selvam, Arunpravin
>>>>>> @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>>           if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>>>                   vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>>>>>>
>>>>>> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
>>>>>> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
>>>>> Is there any reason to not always do this?
>>>> Here we are trying to keep a pool of cleared memory which are cleared on
>>>> free path and that can given
>>>> to the application which requires a zeroed memory. I think here if we
>>>> set always clear the memory,
>>>> we would go over the limit of cleared memory in that particular instance
>>>> and the application should wait until
>>>> the hardware clears the memory and this might impact the overall
>>>> performance.
>>> I'd like to have the driver always deliver cleared memory.
>> Actually, I think we can just always set the flag in the allocation IOCTLs.
> 
> We have use cases where that hurts as. Especially during boot when the backing VRAM isn't cleared yet.
> 
> That's one of the reasons why we never always cleared the memory.

Any such performance gain was only valid in the first place if the kernel delivering non-cleared memory to user space was considered acceptable, which it quite clearly isn't.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@ 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -601,8 +602,7 @@  int amdgpu_bo_create(struct amdgpu_device *adev,
 	if (!amdgpu_bo_support_uswc(bo->flags))
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-	if (adev->ras_enabled)
-		bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+	bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
 	bo->tbo.bdev = &adev->mman.bdev;
 	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@  int amdgpu_bo_create(struct amdgpu_device *adev,
 
 	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-		struct dma_fence *fence;
+		struct dma_fence *fence = NULL;
 
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+		r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
 		if (unlikely(r))
 			goto fail_unreserve;
 
-		dma_resv_add_fence(bo->tbo.base.resv, fence,
-				   DMA_RESV_USAGE_KERNEL);
-		dma_fence_put(fence);
+		if (fence) {
+			dma_resv_add_fence(bo->tbo.base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			dma_fence_put(fence);
+		}
 	}
 	if (!bp->resv)
 		amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@  void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
 		return;
 
-	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+	r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
 	if (!WARN_ON(r)) {
+		struct amdgpu_vram_mgr_resource *vres;
+
+		vres = to_amdgpu_vram_mgr_resource(bo->resource);
+		vres->flags |= DRM_BUDDY_CLEARED;
 		amdgpu_bo_fence(abo, fence, false);
 		dma_fence_put(fence);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@  static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 	}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		if (!amdgpu_vram_mgr_is_cleared(block))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e..bcbffe909b47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -378,11 +378,15 @@  static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
 		struct dma_fence *wipe_fence = NULL;
 
-		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
-					false);
+		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+				       false);
 		if (r) {
 			goto error;
 		} else if (wipe_fence) {
+			struct amdgpu_vram_mgr_resource *vres;
+
+			vres = to_amdgpu_vram_mgr_resource(bo->resource);
+			vres->flags |= DRM_BUDDY_CLEARED;
 			dma_fence_put(fence);
 			fence = wipe_fence;
 		}
@@ -2214,6 +2218,59 @@  static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
 	return 0;
 }
 
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_res_cursor cursor;
+	struct dma_fence *f = NULL;
+	u64 addr;
+	int r;
+
+	if (!adev->mman.buffer_funcs_enabled)
+		return -EINVAL;
+
+	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (cursor.remaining) {
+		struct dma_fence *next = NULL;
+		u64 size;
+
+		if (amdgpu_res_cleared(&cursor)) {
+			amdgpu_res_next(&cursor, cursor.size);
+			continue;
+		}
+
+		/* Never clear more than 256MiB at once to avoid timeouts */
+		size = min(cursor.size, 256ULL << 20);
+
+		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+					  1, ring, false, &size, &addr);
+		if (r)
+			goto err;
+
+		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+					&next, true, true);
+		if (r)
+			goto err;
+
+		dma_fence_put(f);
+		f = next;
+
+		amdgpu_res_next(&cursor, size);
+	}
+err:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return r;
+}
+
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e..b404d89d52e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -38,8 +38,6 @@ 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
 
-#define AMDGPU_POISON	0xd0bed0be
-
 extern const struct attribute_group amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
 
@@ -155,6 +153,9 @@  int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			       uint64_t size, bool tmz,
 			       struct dma_resv *resv,
 			       struct dma_fence **f);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c0c851409241..e494f5bf136a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -450,6 +450,7 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 	u64 vis_usage = 0, max_bytes, min_block_size;
 	struct amdgpu_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
@@ -501,6 +502,9 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
 
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
 	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -604,7 +608,7 @@  static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks, 0);
+	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 0e04e42cf809..8478522d7366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -53,6 +53,11 @@  static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
 	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
 }
 
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block);
+}
+
 static inline struct amdgpu_vram_mgr_resource *
 to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
 {