Message ID | 20240718103243.1241392-2-Arunpravin.PaneerSelvam@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v6,1/2] drm/buddy: Add start address support to trim function | expand |
Am 18.07.24 um 12:32 schrieb Arunpravin Paneer Selvam: > Add address alignment support to the DCC VRAM buffers. > > v2: > - adjust size based on the max_texture_channel_caches values > only for GFX12 DCC buffers. > - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only > for DCC buffers. > - roundup non power of two DCC buffer adjusted size to nearest > power of two number as the buddy allocator does not support non > power of two alignments. This applies only to the contiguous > DCC buffers. > > v3:(Alex) > - rewrite the max texture channel caches comparison code in an > algorithmic way to determine the alignment size. > > v4:(Alex) > - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c > and add a new gmc func callback for dcc alignment. If the callback > is non-NULL, call it to get the alignment, otherwise, use the default. > > v5:(Alex) > - Set the Alignment to a default value if the callback doesn't exist. > - Add the callback to amdgpu_gmc_funcs. > > v6: > - Fix checkpatch warning reported by Intel CI. > > v7:(Christian) > - remove the AMDGPU_GEM_CREATE_GFX12_DCC flag and keep a flag that > checks the BO pinning and for a specific hw generation. > > Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> > Acked-by: Alex Deucher <alexander.deucher@amd.com> > Acked-by: Christian König <christian.koenig@amd.com> > Reviewed-by: Frank Min <Frank.Min@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 39 +++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 15 ++++++++ > 3 files changed, 58 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index febca3130497..654d0548a3f8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { > uint64_t addr, uint64_t *flags); > /* get the amount of memory used by the vbios for pre-OS console */ > unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); > + /* get the DCC buffer alignment */ > + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); > > enum amdgpu_memory_partition (*query_mem_partition_mode)( > struct amdgpu_device *adev); > @@ -363,6 +365,10 @@ struct amdgpu_gmc { > (adev)->gmc.gmc_funcs->override_vm_pte_flags \ > ((adev), (vm), (addr), (pte_flags)) > #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) > +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ > + typeof(_adev) (adev) = (_adev); \ > + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ > +}) > > /** > * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index f91cc149d06c..ace9d61fc512 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -512,6 +512,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; > > remaining_size = (u64)vres->base.size; > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) { I think you should move this check into gmc_v12_0_get_dcc_alignment. E.g. here you just check if adev->gmc.gmc_funcs->get_dcc_alignment is not NULL. Then call the function and if it returns a non zero value apply it. Regards, Christian. > + u64 adjust_size; > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + adjust_size = amdgpu_gmc_get_dcc_alignment(adev); > + remaining_size = roundup_pow_of_two(remaining_size + adjust_size); > + vres->flags |= DRM_BUDDY_TRIM_DISABLE; > + } > + } > > mutex_lock(&mgr->lock); > while (remaining_size) { > @@ -521,8 +532,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > min_block_size = mgr->default_page_size; > > size = remaining_size; > - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) > + > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) > + min_block_size = size; > + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && > + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) > min_block_size = (u64)pages_per_block << PAGE_SHIFT; > > BUG_ON(min_block_size < mm->chunk_size); > @@ -553,6 +569,25 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > } > mutex_unlock(&mgr->lock); > > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && > + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || > + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) { > + struct drm_buddy_block *dcc_block; > + u64 dcc_start, alignment; > + > + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); > + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); > + > + if (adev->gmc.gmc_funcs->get_dcc_alignment) { > + alignment = amdgpu_gmc_get_dcc_alignment(adev); > + /* Adjust the start address for DCC buffers only */ > + dcc_start = roundup(dcc_start, alignment); > + drm_buddy_block_trim(mm, &dcc_start, > + (u64)vres->base.size, > + &vres->blocks); > + } > + } > + > vres->base.start = 0; > size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), > vres->base.size); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > index fd3ac483760e..4259edcdec8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c > @@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) > return 0; > } > > +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) > +{ > + u64 max_tex_channel_caches, alignment; > + > + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; > + if (is_power_of_2(max_tex_channel_caches)) > + alignment = (max_tex_channel_caches / SZ_4) * max_tex_channel_caches; > + else > + alignment = roundup_pow_of_two(max_tex_channel_caches) * > + max_tex_channel_caches; > + > + return (u64)alignment * SZ_1K; > +} > + > static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { > .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, > .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, > @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { > .get_vm_pde = gmc_v12_0_get_vm_pde, > .get_vm_pte = gmc_v12_0_get_vm_pte, > .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, > + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, > }; > > static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..654d0548a3f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + u64 (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(_adev) ({ \ + typeof(_adev) (adev) = (_adev); \ + ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..ace9d61fc512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -512,6 +512,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) { + u64 adjust_size; + + if (adev->gmc.gmc_funcs->get_dcc_alignment) { + adjust_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = roundup_pow_of_two(remaining_size + adjust_size); + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +532,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +569,25 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) { + struct drm_buddy_block *dcc_block; + u64 dcc_start, alignment; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + dcc_start = amdgpu_vram_mgr_block_start(dcc_block); + + if (adev->gmc.gmc_funcs->get_dcc_alignment) { + alignment = amdgpu_gmc_get_dcc_alignment(adev); + /* Adjust the start address for DCC buffers only */ + dcc_start = roundup(dcc_start, alignment); + drm_buddy_block_trim(mm, &dcc_start, + (u64)vres->base.size, + &vres->blocks); + } + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..4259edcdec8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + u64 max_tex_channel_caches, alignment; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (max_tex_channel_caches / SZ_4) * max_tex_channel_caches; + else + alignment = roundup_pow_of_two(max_tex_channel_caches) * + max_tex_channel_caches; + + return (u64)alignment * SZ_1K; +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)