Message ID | 20231207151142.929349-2-Arunpravin.PaneerSelvam@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] drm/buddy: Implement tracking clear page feature | expand |
On Thu, Dec 7, 2023 at 10:12 AM Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> wrote: > > Add clear page support in vram memory region. > > Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +++-- > .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 25 ++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 50 +++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 +++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 5 ++ > 6 files changed, 105 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index cef920a93924..bc4ea87f8b5e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -39,6 +39,7 @@ > #include "amdgpu.h" > #include "amdgpu_trace.h" > #include "amdgpu_amdkfd.h" > +#include "amdgpu_vram_mgr.h" > > /** > * DOC: amdgpu_object > @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev, > > if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && > bo->tbo.resource->mem_type == TTM_PL_VRAM) { > - struct dma_fence *fence; > + struct dma_fence *fence = NULL; > > - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); > + r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true); > if (unlikely(r)) > goto fail_unreserve; > > - dma_resv_add_fence(bo->tbo.base.resv, fence, > - DMA_RESV_USAGE_KERNEL); > - dma_fence_put(fence); > + if (fence) { > + dma_resv_add_fence(bo->tbo.base.resv, fence, > + DMA_RESV_USAGE_KERNEL); > + dma_fence_put(fence); > + } > } > if (!bp->resv) > amdgpu_bo_unreserve(bo); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > index 381101d2bf05..50fcd86e1033 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) > } > } > > +/** > + * amdgpu_res_cleared - check if blocks are cleared > + * > + * @cur: the cursor to extract the block > + * > + * Check if the @cur block is cleared > + */ > +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) > +{ > + struct drm_buddy_block *block; > + > + switch (cur->mem_type) { > + case TTM_PL_VRAM: > + block = cur->node; > + > + if (!amdgpu_vram_mgr_is_cleared(block)) > + return false; > + break; > + default: > + return false; > + } > + > + return true; > +} > + > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 05991c5c8ddb..6d7514e8f40c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, > return 0; > } > > +int amdgpu_clear_buffer(struct amdgpu_bo *bo, amdgpu_ttm_clear_buffer() for naming consistency. Alex > + struct dma_resv *resv, > + struct dma_fence **fence, > + bool delayed) > +{ > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > + struct amdgpu_res_cursor cursor; > + struct dma_fence *f = NULL; > + u64 addr; > + int r; > + > + if (!adev->mman.buffer_funcs_enabled) > + return -EINVAL; > + > + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); > + > + mutex_lock(&adev->mman.gtt_window_lock); > + while (cursor.remaining) { > + struct dma_fence *next = NULL; > + u64 size; > + > + /* Never clear more than 256MiB at once to avoid timeouts */ > + size = min(cursor.size, 256ULL << 20); > + > + if (!amdgpu_res_cleared(&cursor)) { > + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, > + 1, ring, false, &size, &addr); > + if (r) > + goto err; > + > + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, > + &next, true, delayed); > + if (r) > + goto err; > + } > + dma_fence_put(f); > + f = next; > + > + amdgpu_res_next(&cursor, size); > + } > +err: > + mutex_unlock(&adev->mman.gtt_window_lock); > + if (fence) > + *fence = dma_fence_get(f); > + dma_fence_put(f); > + > + return r; > +} > + > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > uint32_t src_data, > struct dma_resv *resv, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 65ec82141a8e..838251166883 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > uint64_t size, bool tmz, > struct dma_resv *resv, > struct dma_fence **f); > +int amdgpu_clear_buffer(struct amdgpu_bo *bo, > + struct dma_resv *resv, > + struct dma_fence **fence, > + bool delayed); > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > uint32_t src_data, > struct dma_resv *resv, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index d0e199cc8f17..ff74c324b5b5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > { > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > struct amdgpu_device *adev = to_amdgpu_device(mgr); > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); > u64 vis_usage = 0, max_bytes, min_block_size; > struct amdgpu_vram_mgr_resource *vres; > u64 size, remaining_size, lpfn, fpfn; > @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > if (place->flags & TTM_PL_FLAG_CONTIGUOUS) > vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) > + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; > + > if (fpfn || lpfn != mgr->mm.size) > /* Allocate blocks in desired range */ > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, > struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > struct amdgpu_device *adev = to_amdgpu_device(mgr); > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo); > struct drm_buddy *mm = &mgr->mm; > + struct dma_fence *fence = NULL; > struct drm_buddy_block *block; > uint64_t vis_usage = 0; > > @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, > > amdgpu_vram_mgr_do_reserve(man); > > - drm_buddy_free_list(mm, &vres->blocks, 0); > + /* Clear all the blocks in free path */ > + if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) { > + vres->flags |= DRM_BUDDY_CLEARED; > + dma_fence_put(fence); > + } > + > + drm_buddy_free_list(mm, &vres->blocks, vres->flags); > mutex_unlock(&mgr->lock); > > atomic64_sub(vis_usage, &mgr->vis_usage); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > index 0e04e42cf809..8478522d7366 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) > return (u64)PAGE_SIZE << drm_buddy_block_order(block); > } > > +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) > +{ > + return drm_buddy_block_is_clear(block); > +} > + > static inline struct amdgpu_vram_mgr_resource * > to_amdgpu_vram_mgr_resource(struct ttm_resource *res) > { > -- > 2.25.1 >
Am 07.12.23 um 16:11 schrieb Arunpravin Paneer Selvam: > Add clear page support in vram memory region. The first patch looks good, but this here needs quite some work. > > Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +++-- > .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 25 ++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 50 +++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 +++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 5 ++ > 6 files changed, 105 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index cef920a93924..bc4ea87f8b5e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -39,6 +39,7 @@ > #include "amdgpu.h" > #include "amdgpu_trace.h" > #include "amdgpu_amdkfd.h" > +#include "amdgpu_vram_mgr.h" > > /** > * DOC: amdgpu_object > @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev, > > if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && > bo->tbo.resource->mem_type == TTM_PL_VRAM) { > - struct dma_fence *fence; > + struct dma_fence *fence = NULL; > > - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); > + r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true); > if (unlikely(r)) > goto fail_unreserve; > > - dma_resv_add_fence(bo->tbo.base.resv, fence, > - DMA_RESV_USAGE_KERNEL); > - dma_fence_put(fence); > + if (fence) { > + dma_resv_add_fence(bo->tbo.base.resv, fence, > + DMA_RESV_USAGE_KERNEL); > + dma_fence_put(fence); > + } > } > if (!bp->resv) > amdgpu_bo_unreserve(bo); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > index 381101d2bf05..50fcd86e1033 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) > } > } > > +/** > + * amdgpu_res_cleared - check if blocks are cleared > + * > + * @cur: the cursor to extract the block > + * > + * Check if the @cur block is cleared > + */ > +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) > +{ > + struct drm_buddy_block *block; > + > + switch (cur->mem_type) { > + case TTM_PL_VRAM: > + block = cur->node; > + > + if (!amdgpu_vram_mgr_is_cleared(block)) > + return false; > + break; > + default: > + return false; > + } > + > + return true; > +} > + > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 05991c5c8ddb..6d7514e8f40c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, > return 0; > } > > +int amdgpu_clear_buffer(struct amdgpu_bo *bo, > + struct dma_resv *resv, > + struct dma_fence **fence, > + bool delayed) Drop the delayed parameter, that doesn't make any sense here. And as Alex said please use an amdgpu_ttm_ prefix for the function name. > +{ > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > + struct amdgpu_res_cursor cursor; > + struct dma_fence *f = NULL; > + u64 addr; > + int r; > + > + if (!adev->mman.buffer_funcs_enabled) > + return -EINVAL; > + > + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); > + > + mutex_lock(&adev->mman.gtt_window_lock); > + while (cursor.remaining) { > + struct dma_fence *next = NULL; > + u64 size; > + > + /* Never clear more than 256MiB at once to avoid timeouts */ > + size = min(cursor.size, 256ULL << 20); > + > + if (!amdgpu_res_cleared(&cursor)) { This needs to come before the min(cursor.size....) directly above. I suggest a handling like this: if (amdgpu_res_cleared(&cursor)) { amdgpu_res_next(&cursor, cursor.size); continue; } size = min(.... > + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, > + 1, ring, false, &size, &addr); > + if (r) > + goto err; > + > + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, > + &next, true, delayed); > + if (r) > + goto err; > + } > + dma_fence_put(f); > + f = next; > + > + amdgpu_res_next(&cursor, size); > + } > +err: > + mutex_unlock(&adev->mman.gtt_window_lock); > + if (fence) > + *fence = dma_fence_get(f); > + dma_fence_put(f); > + > + return r; > +} > + > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > uint32_t src_data, > struct dma_resv *resv, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 65ec82141a8e..838251166883 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > uint64_t size, bool tmz, > struct dma_resv *resv, > struct dma_fence **f); > +int amdgpu_clear_buffer(struct amdgpu_bo *bo, > + struct dma_resv *resv, > + struct dma_fence **fence, > + bool delayed); > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > uint32_t src_data, > struct dma_resv *resv, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index d0e199cc8f17..ff74c324b5b5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > { > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > struct amdgpu_device *adev = to_amdgpu_device(mgr); > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); > u64 vis_usage = 0, max_bytes, min_block_size; > struct amdgpu_vram_mgr_resource *vres; > u64 size, remaining_size, lpfn, fpfn; > @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > if (place->flags & TTM_PL_FLAG_CONTIGUOUS) > vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) > + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; > + > if (fpfn || lpfn != mgr->mm.size) > /* Allocate blocks in desired range */ > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, > struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > struct amdgpu_device *adev = to_amdgpu_device(mgr); > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo); > struct drm_buddy *mm = &mgr->mm; > + struct dma_fence *fence = NULL; > struct drm_buddy_block *block; > uint64_t vis_usage = 0; > > @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, > > amdgpu_vram_mgr_do_reserve(man); > > - drm_buddy_free_list(mm, &vres->blocks, 0); > + /* Clear all the blocks in free path */ > + if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) { > + vres->flags |= DRM_BUDDY_CLEARED; > + dma_fence_put(fence); > + } > + That's a pretty clear no-go. This is the backend and CS is done from the front end. E.g. can't properly wait for the fence for example. Instead use the AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE flag for this. IIRC we already always set this flag when ras is enabled, just make it mandatory for now. > + drm_buddy_free_list(mm, &vres->blocks, vres->flags); > mutex_unlock(&mgr->lock); > > atomic64_sub(vis_usage, &mgr->vis_usage); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > index 0e04e42cf809..8478522d7366 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) > return (u64)PAGE_SIZE << drm_buddy_block_order(block); > } > > +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) > +{ > + return drm_buddy_block_is_clear(block); > +} > + You also need a functionality which resets all cleared blocks to uncleared after suspend/resume. No idea how to do this, maybe Alex knows of hand. Regards, Christian. > static inline struct amdgpu_vram_mgr_resource * > to_amdgpu_vram_mgr_resource(struct ttm_resource *res) > {
On Fri, Dec 8, 2023 at 5:07 AM Christian König <christian.koenig@amd.com> wrote: > > Am 07.12.23 um 16:11 schrieb Arunpravin Paneer Selvam: > > Add clear page support in vram memory region. > > The first patch looks good, but this here needs quite some work. > > > > > Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +++-- > > .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 25 ++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 50 +++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 ++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 +++++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 5 ++ > > 6 files changed, 105 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > index cef920a93924..bc4ea87f8b5e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > @@ -39,6 +39,7 @@ > > #include "amdgpu.h" > > #include "amdgpu_trace.h" > > #include "amdgpu_amdkfd.h" > > +#include "amdgpu_vram_mgr.h" > > > > /** > > * DOC: amdgpu_object > > @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev, > > > > if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && > > bo->tbo.resource->mem_type == TTM_PL_VRAM) { > > - struct dma_fence *fence; > > + struct dma_fence *fence = NULL; > > > > - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); > > + r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true); > > if (unlikely(r)) > > goto fail_unreserve; > > > > - dma_resv_add_fence(bo->tbo.base.resv, fence, > > - DMA_RESV_USAGE_KERNEL); > > - dma_fence_put(fence); > > + if (fence) { > > + dma_resv_add_fence(bo->tbo.base.resv, fence, > > + DMA_RESV_USAGE_KERNEL); > > + dma_fence_put(fence); > > + } > > } > > if (!bp->resv) > > amdgpu_bo_unreserve(bo); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > > index 381101d2bf05..50fcd86e1033 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h > > @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) > > } > > } > > > > +/** > > + * amdgpu_res_cleared - check if blocks are cleared > > + * > > + * @cur: the cursor to extract the block > > + * > > + * Check if the @cur block is cleared > > + */ > > +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) > > +{ > > + struct drm_buddy_block *block; > > + > > + switch (cur->mem_type) { > > + case TTM_PL_VRAM: > > + block = cur->node; > > + > > + if (!amdgpu_vram_mgr_is_cleared(block)) > > + return false; > > + break; > > + default: > > + return false; > > + } > > + > > + return true; > > +} > > + > > #endif > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > index 05991c5c8ddb..6d7514e8f40c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, > > return 0; > > } > > > > +int amdgpu_clear_buffer(struct amdgpu_bo *bo, > > + struct dma_resv *resv, > > + struct dma_fence **fence, > > + bool delayed) > > Drop the delayed parameter, that doesn't make any sense here. > > And as Alex said please use an amdgpu_ttm_ prefix for the function name. > > > +{ > > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > > + struct amdgpu_res_cursor cursor; > > + struct dma_fence *f = NULL; > > + u64 addr; > > + int r; > > + > > + if (!adev->mman.buffer_funcs_enabled) > > + return -EINVAL; > > + > > + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); > > + > > + mutex_lock(&adev->mman.gtt_window_lock); > > + while (cursor.remaining) { > > + struct dma_fence *next = NULL; > > + u64 size; > > + > > + /* Never clear more than 256MiB at once to avoid timeouts */ > > + size = min(cursor.size, 256ULL << 20); > > + > > + if (!amdgpu_res_cleared(&cursor)) { > > This needs to come before the min(cursor.size....) directly above. I > suggest a handling like this: > > if (amdgpu_res_cleared(&cursor)) { > amdgpu_res_next(&cursor, cursor.size); > continue; > } > > size = min(.... > > > + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, > > + 1, ring, false, &size, &addr); > > + if (r) > > + goto err; > > + > > + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, > > + &next, true, delayed); > > + if (r) > > + goto err; > > + } > > + dma_fence_put(f); > > + f = next; > > + > > + amdgpu_res_next(&cursor, size); > > + } > > +err: > > + mutex_unlock(&adev->mman.gtt_window_lock); > > + if (fence) > > + *fence = dma_fence_get(f); > > + dma_fence_put(f); > > + > > + return r; > > +} > > + > > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > > uint32_t src_data, > > struct dma_resv *resv, > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > index 65ec82141a8e..838251166883 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > > uint64_t size, bool tmz, > > struct dma_resv *resv, > > struct dma_fence **f); > > +int amdgpu_clear_buffer(struct amdgpu_bo *bo, > > + struct dma_resv *resv, > > + struct dma_fence **fence, > > + bool delayed); > > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > > uint32_t src_data, > > struct dma_resv *resv, > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > > index d0e199cc8f17..ff74c324b5b5 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > > @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > > { > > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > > struct amdgpu_device *adev = to_amdgpu_device(mgr); > > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); > > u64 vis_usage = 0, max_bytes, min_block_size; > > struct amdgpu_vram_mgr_resource *vres; > > u64 size, remaining_size, lpfn, fpfn; > > @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > > if (place->flags & TTM_PL_FLAG_CONTIGUOUS) > > vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; > > > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) > > + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; > > + > > if (fpfn || lpfn != mgr->mm.size) > > /* Allocate blocks in desired range */ > > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > > @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, > > struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); > > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > > struct amdgpu_device *adev = to_amdgpu_device(mgr); > > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo); > > struct drm_buddy *mm = &mgr->mm; > > + struct dma_fence *fence = NULL; > > struct drm_buddy_block *block; > > uint64_t vis_usage = 0; > > > > @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, > > > > amdgpu_vram_mgr_do_reserve(man); > > > > - drm_buddy_free_list(mm, &vres->blocks, 0); > > + /* Clear all the blocks in free path */ > > + if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) { > > + vres->flags |= DRM_BUDDY_CLEARED; > > + dma_fence_put(fence); > > + } > > + > > That's a pretty clear no-go. This is the backend and CS is done from the > front end. E.g. can't properly wait for the fence for example. > > Instead use the AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE flag for this. > > IIRC we already always set this flag when ras is enabled, just make it > mandatory for now. > > > + drm_buddy_free_list(mm, &vres->blocks, vres->flags); > > mutex_unlock(&mgr->lock); > > > > atomic64_sub(vis_usage, &mgr->vis_usage); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > > index 0e04e42cf809..8478522d7366 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h > > @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) > > return (u64)PAGE_SIZE << drm_buddy_block_order(block); > > } > > > > +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) > > +{ > > + return drm_buddy_block_is_clear(block); > > +} > > + > > You also need a functionality which resets all cleared blocks to > uncleared after suspend/resume. > > No idea how to do this, maybe Alex knows of hand. Since the buffers are cleared on creation, is there actually anything to do? Alex > > Regards, > Christian. > > > static inline struct amdgpu_vram_mgr_resource * > > to_amdgpu_vram_mgr_resource(struct ttm_resource *res) > > { >
Am 08.12.23 um 20:53 schrieb Alex Deucher: > [SNIP] >> You also need a functionality which resets all cleared blocks to >> uncleared after suspend/resume. >> >> No idea how to do this, maybe Alex knows of hand. > Since the buffers are cleared on creation, is there actually anything to do? Well exactly that's the problem, the buffers are no longer always cleared on creation with this patch. Instead we clear on free, track which areas are cleared and clear only the ones which aren't cleared yet on creation. So some cases need special handling. E.g. when the engine is not initialized yet or suspend/resume. In theory after a suspend/resume cycle the VRAM is cleared to zeros, but in practice that's not always true. Christian. > > Alex
On Mon, Dec 11, 2023 at 4:50 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > > Am 08.12.23 um 20:53 schrieb Alex Deucher: > > [SNIP] > > You also need a functionality which resets all cleared blocks to > uncleared after suspend/resume. > > No idea how to do this, maybe Alex knows of hand. > > Since the buffers are cleared on creation, is there actually anything to do? > > > Well exactly that's the problem, the buffers are no longer always cleared on creation with this patch. > > Instead we clear on free, track which areas are cleared and clear only the ones which aren't cleared yet on creation. > > So some cases need special handling. E.g. when the engine is not initialized yet or suspend/resume. > > In theory after a suspend/resume cycle the VRAM is cleared to zeros, but in practice that's not always true. The vbios asic_init table will clear vram on boards with RAS, but not on others. Alex
On 2023-12-11 04:50, Christian König wrote: > Am 08.12.23 um 20:53 schrieb Alex Deucher: >> [SNIP] >>> You also need a functionality which resets all cleared blocks to >>> uncleared after suspend/resume. >>> >>> No idea how to do this, maybe Alex knows of hand. >> Since the buffers are cleared on creation, is there actually anything to do? > > Well exactly that's the problem, the buffers are no longer always > cleared on creation with this patch. > > Instead we clear on free, track which areas are cleared and clear only > the ones which aren't cleared yet on creation. The code I added for clearing-on-free a long time ago, does not clear to 0, but to a non-0 poison value. That was meant to make it easier to catch applications incorrectly relying on 0-initialized memory. Is that being changed? I didn't see it in this patch series. Regards, Felix > > So some cases need special handling. E.g. when the engine is not > initialized yet or suspend/resume. > > In theory after a suspend/resume cycle the VRAM is cleared to zeros, > but in practice that's not always true. > > Christian. > >> Alex
Am 12.12.23 um 00:32 schrieb Felix Kuehling: > > On 2023-12-11 04:50, Christian König wrote: >> Am 08.12.23 um 20:53 schrieb Alex Deucher: >>> [SNIP] >>>> You also need a functionality which resets all cleared blocks to >>>> uncleared after suspend/resume. >>>> >>>> No idea how to do this, maybe Alex knows of hand. >>> Since the buffers are cleared on creation, is there actually >>> anything to do? >> >> Well exactly that's the problem, the buffers are no longer always >> cleared on creation with this patch. >> >> Instead we clear on free, track which areas are cleared and clear >> only the ones which aren't cleared yet on creation. > > The code I added for clearing-on-free a long time ago, does not clear > to 0, but to a non-0 poison value. That was meant to make it easier to > catch applications incorrectly relying on 0-initialized memory. Is > that being changed? I didn't see it in this patch series. Yeah, Arun stumbled over that as well. Any objections that we fill with zeros instead or is that poison value something necessary for debugging? Regards, Christian. > > Regards, > Felix > > >> >> So some cases need special handling. E.g. when the engine is not >> initialized yet or suspend/resume. >> >> In theory after a suspend/resume cycle the VRAM is cleared to zeros, >> but in practice that's not always true. >> >> Christian. >> >>> Alex
On 2023-12-13 9:20, Christian König wrote: > Am 12.12.23 um 00:32 schrieb Felix Kuehling: >> >> On 2023-12-11 04:50, Christian König wrote: >>> Am 08.12.23 um 20:53 schrieb Alex Deucher: >>>> [SNIP] >>>>> You also need a functionality which resets all cleared blocks to >>>>> uncleared after suspend/resume. >>>>> >>>>> No idea how to do this, maybe Alex knows of hand. >>>> Since the buffers are cleared on creation, is there actually >>>> anything to do? >>> >>> Well exactly that's the problem, the buffers are no longer always >>> cleared on creation with this patch. >>> >>> Instead we clear on free, track which areas are cleared and clear >>> only the ones which aren't cleared yet on creation. >> >> The code I added for clearing-on-free a long time ago, does not clear >> to 0, but to a non-0 poison value. That was meant to make it easier >> to catch applications incorrectly relying on 0-initialized memory. Is >> that being changed? I didn't see it in this patch series. > > Yeah, Arun stumbled over that as well. Any objections that we fill > with zeros instead or is that poison value something necessary for > debugging? I don't think it's strictly necessary. But it may encourage sloppy user mode programming to rely on 0-initialized memory that ends up breaking in corner cases or on older kernels. That said, I see that this patch series adds clearing of memory in the VRAM manager, but it doesn't remove the clearing for AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE in amdgpu_bo_release_notify and amdgpu_move_blit. This will lead to duplicate work. I'm also not sure how the clearing added in this patch series will affect free-latency observed in user mode. Will this be synchronous and cause the user mode thread to stall while the memory is being cleared? Regards, Felix > > Regards, > Christian. > >> >> Regards, >> Felix >> >> >>> >>> So some cases need special handling. E.g. when the engine is not >>> initialized yet or suspend/resume. >>> >>> In theory after a suspend/resume cycle the VRAM is cleared to zeros, >>> but in practice that's not always true. >>> >>> Christian. >>> >>>> Alex >
Am 13.12.23 um 16:39 schrieb Felix Kuehling: > On 2023-12-13 9:20, Christian König wrote: >> Am 12.12.23 um 00:32 schrieb Felix Kuehling: >>> >>> On 2023-12-11 04:50, Christian König wrote: >>>> Am 08.12.23 um 20:53 schrieb Alex Deucher: >>>>> [SNIP] >>>>>> You also need a functionality which resets all cleared blocks to >>>>>> uncleared after suspend/resume. >>>>>> >>>>>> No idea how to do this, maybe Alex knows of hand. >>>>> Since the buffers are cleared on creation, is there actually >>>>> anything to do? >>>> >>>> Well exactly that's the problem, the buffers are no longer always >>>> cleared on creation with this patch. >>>> >>>> Instead we clear on free, track which areas are cleared and clear >>>> only the ones which aren't cleared yet on creation. >>> >>> The code I added for clearing-on-free a long time ago, does not >>> clear to 0, but to a non-0 poison value. That was meant to make it >>> easier to catch applications incorrectly relying on 0-initialized >>> memory. Is that being changed? I didn't see it in this patch series. >> >> Yeah, Arun stumbled over that as well. Any objections that we fill >> with zeros instead or is that poison value something necessary for >> debugging? > > I don't think it's strictly necessary. But it may encourage sloppy > user mode programming to rely on 0-initialized memory that ends up > breaking in corner cases or on older kernels. > > That said, I see that this patch series adds clearing of memory in the > VRAM manager, but it doesn't remove the clearing for > AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE in amdgpu_bo_release_notify and > amdgpu_move_blit. This will lead to duplicate work. > > I'm also not sure how the clearing added in this patch series will > affect free-latency observed in user mode. Will this be synchronous > and cause the user mode thread to stall while the memory is being > cleared? Yeah, that's not fully working at the moment. I already pointed out as well that Arun should remove the clearing in the VRAM manager. Regards, Christian. > > Regards, > Felix > > >> >> Regards, >> Christian. >> >>> >>> Regards, >>> Felix >>> >>> >>>> >>>> So some cases need special handling. E.g. when the engine is not >>>> initialized yet or suspend/resume. >>>> >>>> In theory after a suspend/resume cycle the VRAM is cleared to >>>> zeros, but in practice that's not always true. >>>> >>>> Christian. >>>> >>>>> Alex >>
On 2023-12-13 16:39, Felix Kuehling wrote: > On 2023-12-13 9:20, Christian König wrote: >> Am 12.12.23 um 00:32 schrieb Felix Kuehling: >>> On 2023-12-11 04:50, Christian König wrote: >>>> Am 08.12.23 um 20:53 schrieb Alex Deucher: >>>>> [SNIP] >>>>>> You also need a functionality which resets all cleared blocks to >>>>>> uncleared after suspend/resume. >>>>>> >>>>>> No idea how to do this, maybe Alex knows of hand. >>>>> Since the buffers are cleared on creation, is there actually anything to do? >>>> >>>> Well exactly that's the problem, the buffers are no longer always cleared on creation with this patch. >>>> >>>> Instead we clear on free, track which areas are cleared and clear only the ones which aren't cleared yet on creation. >>> >>> The code I added for clearing-on-free a long time ago, does not clear to 0, but to a non-0 poison value. That was meant to make it easier to catch applications incorrectly relying on 0-initialized memory. Is that being changed? I didn't see it in this patch series. >> >> Yeah, Arun stumbled over that as well. Any objections that we fill with zeros instead or is that poison value something necessary for debugging? > > I don't think it's strictly necessary. But it may encourage sloppy user mode programming to rely on 0-initialized memory that ends up breaking in corner cases or on older kernels. From a security PoV, the kernel should never return uncleared memory to (at least unprivileged) user space. This series seems like a big step in that direction.
Am 13.12.23 um 16:46 schrieb Michel Dänzer:
> From a security PoV, the kernel should never return uncleared memory to (at least unprivileged) user space. This series seems like a big step in that direction.
Well please take a look at the MAP_UNINITIALIZED flag for mmap(). We
even have the functionality to return uninitialized system memory when
the kernel compile option for this is set since this is an important
optimization for many use cases.
Regards,
Christian.
On 2023-12-14 11:31, Christian König wrote: > Am 13.12.23 um 16:46 schrieb Michel Dänzer: >> From a security PoV, the kernel should never return uncleared memory to (at least unprivileged) user space. This series seems like a big step in that direction. > > Well please take a look at the MAP_UNINITIALIZED flag for mmap(). MAP_UNINITIALIZED (since Linux 2.6.33) Don't clear anonymous pages. This flag is intended to improve performance on embedded devices. This flag is honored only if the kernel was configured with the CONFIG_MMAP_ALLOW_UNINITIAL‐ IZED option. Because of the security implications, that option is normally enabled only on embedded devices (i.e., devices where one has complete control of the contents of user memory). > We even have the functionality to return uninitialized system memory when the kernel compile option for this is set From mm/Kconfig: config MMAP_ALLOW_UNINITIALIZED bool "Allow mmapped anonymous memory to be uninitialized" depends on EXPERT && !MMU default n help Normally, and according to the Linux spec, anonymous memory obtained from mmap() has its contents cleared before it is passed to userspace. Enabling this config option allows you to request that mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus providing a huge performance boost. If this option is not enabled, then the flag will be ignored. This is taken advantage of by uClibc's malloc(), and also by ELF-FDPIC binfmt's brk and stack allocator. Because of the obvious security issues, this option should only be enabled on embedded devices where you control what is run in userspace. Since that isn't generally a problem on no-MMU systems, it is normally safe to say Y here. See Documentation/admin-guide/mm/nommu-mmap.rst for more information. Both looks consistent with what I wrote. > since this is an important optimization for many use cases. Per above, it's available only on platforms without MMU.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cef920a93924..bc4ea87f8b5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -39,6 +39,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_vram_mgr.h" /** * DOC: amdgpu_object @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.resource->mem_type == TTM_PL_VRAM) { - struct dma_fence *fence; + struct dma_fence *fence = NULL; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); + r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true); if (unlikely(r)) goto fail_unreserve; - dma_resv_add_fence(bo->tbo.base.resv, fence, - DMA_RESV_USAGE_KERNEL); - dma_fence_put(fence); + if (fence) { + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } } if (!bp->resv) amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 381101d2bf05..50fcd86e1033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) } } +/** + * amdgpu_res_cleared - check if blocks are cleared + * + * @cur: the cursor to extract the block + * + * Check if the @cur block is cleared + */ +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) +{ + struct drm_buddy_block *block; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + if (!amdgpu_vram_mgr_is_cleared(block)) + return false; + break; + default: + return false; + } + + return true; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 05991c5c8ddb..6d7514e8f40c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, return 0; } +int amdgpu_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence, + bool delayed) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_res_cursor cursor; + struct dma_fence *f = NULL; + u64 addr; + int r; + + if (!adev->mman.buffer_funcs_enabled) + return -EINVAL; + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + + mutex_lock(&adev->mman.gtt_window_lock); + while (cursor.remaining) { + struct dma_fence *next = NULL; + u64 size; + + /* Never clear more than 256MiB at once to avoid timeouts */ + size = min(cursor.size, 256ULL << 20); + + if (!amdgpu_res_cleared(&cursor)) { + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, + 1, ring, false, &size, &addr); + if (r) + goto err; + + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, + &next, true, delayed); + if (r) + goto err; + } + dma_fence_put(f); + f = next; + + amdgpu_res_next(&cursor, size); + } +err: + mutex_unlock(&adev->mman.gtt_window_lock); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return r; +} + int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..838251166883 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); +int amdgpu_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence, + bool delayed); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index d0e199cc8f17..ff74c324b5b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_CONTIGUOUS) vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo); struct drm_buddy *mm = &mgr->mm; + struct dma_fence *fence = NULL; struct drm_buddy_block *block; uint64_t vis_usage = 0; @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks, 0); + /* Clear all the blocks in free path */ + if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) { + vres->flags |= DRM_BUDDY_CLEARED; + dma_fence_put(fence); + } + + drm_buddy_free_list(mm, &vres->blocks, vres->flags); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 0e04e42cf809..8478522d7366 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) return (u64)PAGE_SIZE << drm_buddy_block_order(block); } +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) +{ + return drm_buddy_block_is_clear(block); +} + static inline struct amdgpu_vram_mgr_resource * to_amdgpu_vram_mgr_resource(struct ttm_resource *res) {
Add clear page support in vram memory region. Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +++-- .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 25 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 50 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 14 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 5 ++ 6 files changed, 105 insertions(+), 6 deletions(-)