diff mbox series

[v5,2/2] drm/amdgpu: Add address alignment support to DCC buffers

Message ID 20240716152104.377039-2-Arunpravin.PaneerSelvam@amd.com (mailing list archive)
State New, archived
Headers show
Series [v5,1/2] drm/buddy: Add start address support to trim function | expand

Commit Message

Paneer Selvam, Arunpravin July 16, 2024, 3:21 p.m. UTC
Add address alignment support to the DCC VRAM buffers.

v2:
  - adjust size based on the max_texture_channel_caches values
    only for GFX12 DCC buffers.
  - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
    for DCC buffers.
  - roundup non power of two DCC buffer adjusted size to nearest
    power of two number as the buddy allocator does not support non
    power of two alignments. This applies only to the contiguous
    DCC buffers.

v3:(Alex)
  - rewrite the max texture channel caches comparison code in an
    algorithmic way to determine the alignment size.

v4:(Alex)
  - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c
    and add a new gmc func callback for dcc alignment. If the callback
    is non-NULL, call it to get the alignment, otherwise, use the default.

v5:(Alex)
  - Set the Alignment to a default value if the callback doesn't exist.
  - Add the callback to amdgpu_gmc_funcs.

v6:
  - Fix checkpatch error reported by Intel CI.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Frank Min <Frank.Min@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 ++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)

Comments

Marek Olšák July 17, 2024, 3:44 a.m. UTC | #1
AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, and
almost all of them are not displayable. Shouldn't we use a different way to
indicate that we need a non-power-of-two alignment, such as looking at the
alignment field directly?

Marek

On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam <
Arunpravin.PaneerSelvam@amd.com> wrote:

> Add address alignment support to the DCC VRAM buffers.
>
> v2:
>   - adjust size based on the max_texture_channel_caches values
>     only for GFX12 DCC buffers.
>   - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>     for DCC buffers.
>   - roundup non power of two DCC buffer adjusted size to nearest
>     power of two number as the buddy allocator does not support non
>     power of two alignments. This applies only to the contiguous
>     DCC buffers.
>
> v3:(Alex)
>   - rewrite the max texture channel caches comparison code in an
>     algorithmic way to determine the alignment size.
>
> v4:(Alex)
>   - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c
>     and add a new gmc func callback for dcc alignment. If the callback
>     is non-NULL, call it to get the alignment, otherwise, use the default.
>
> v5:(Alex)
>   - Set the Alignment to a default value if the callback doesn't exist.
>   - Add the callback to amdgpu_gmc_funcs.
>
> v6:
>   - Fix checkpatch error reported by Intel CI.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Acked-by: Alex Deucher <alexander.deucher@amd.com>
> Acked-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Frank Min <Frank.Min@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36 ++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>  3 files changed, 55 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index febca3130497..654d0548a3f8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>                                       uint64_t addr, uint64_t *flags);
>         /* get the amount of memory used by the vbios for pre-OS console */
>         unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
> +       /* get the DCC buffer alignment */
> +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>
>         enum amdgpu_memory_partition (*query_mem_partition_mode)(
>                 struct amdgpu_device *adev);
> @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>         (adev)->gmc.gmc_funcs->override_vm_pte_flags                    \
>                 ((adev), (vm), (addr), (pte_flags))
>  #define amdgpu_gmc_get_vbios_fb_size(adev)
> (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
> +#define amdgpu_gmc_get_dcc_alignment(_adev) ({                 \
> +       typeof(_adev) (adev) = (_adev);                         \
> +       ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));     \
> +})
>
>  /**
>   * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through
> the BAR
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index f91cc149d06c..aa9dca12371c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
> ttm_resource_manager *man,
>                 vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>
>         remaining_size = (u64)vres->base.size;
> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
> +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
> +               u64 adjust_size;
> +
> +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
> +                       adjust_size = amdgpu_gmc_get_dcc_alignment(adev);
> +                       remaining_size = roundup_pow_of_two(remaining_size
> + adjust_size);
> +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
> +               }
> +       }
>
>         mutex_lock(&mgr->lock);
>         while (remaining_size) {
> @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
> ttm_resource_manager *man,
>                         min_block_size = mgr->default_page_size;
>
>                 size = remaining_size;
> -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
> -                   !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
> +
> +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
> +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
> +                       min_block_size = size;
> +               else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
> +                        !(size & (((u64)pages_per_block << PAGE_SHIFT) -
> 1)))
>                         min_block_size = (u64)pages_per_block <<
> PAGE_SHIFT;
>
>                 BUG_ON(min_block_size < mm->chunk_size);
> @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
> ttm_resource_manager *man,
>         }
>         mutex_unlock(&mgr->lock);
>
> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
> +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
> +               struct drm_buddy_block *dcc_block;
> +               u64 dcc_start, alignment;
> +
> +               dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
> +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
> +
> +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
> +                       alignment = amdgpu_gmc_get_dcc_alignment(adev);
> +                       /* Adjust the start address for DCC buffers only */
> +                       dcc_start = roundup(dcc_start, alignment);
> +                       drm_buddy_block_trim(mm, &dcc_start,
> +                                            (u64)vres->base.size,
> +                                            &vres->blocks);
> +               }
> +       }
> +
>         vres->base.start = 0;
>         size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
>                      vres->base.size);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index fd3ac483760e..4259edcdec8a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -542,6 +542,20 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct
> amdgpu_device *adev)
>         return 0;
>  }
>
> +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
> +{
> +       u64 max_tex_channel_caches, alignment;
> +
> +       max_tex_channel_caches =
> adev->gfx.config.max_texture_channel_caches;
> +       if (is_power_of_2(max_tex_channel_caches))
> +               alignment = (max_tex_channel_caches / SZ_4) *
> max_tex_channel_caches;
> +       else
> +               alignment = roundup_pow_of_two(max_tex_channel_caches) *
> +                               max_tex_channel_caches;
> +
> +       return (u64)alignment * SZ_1K;
> +}
> +
>  static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>         .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>         .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
> @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
> gmc_v12_0_gmc_funcs = {
>         .get_vm_pde = gmc_v12_0_get_vm_pde,
>         .get_vm_pte = gmc_v12_0_get_vm_pte,
>         .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
> +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>  };
>
>  static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
> --
> 2.25.1
>
>
Christian König July 17, 2024, 9:08 a.m. UTC | #2
Well that approach was discussed before and seemed to be to complicated.

But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad 
idea. This isn't anything userspace should need to specify in the first 
place.

All we need is a hardware workaround which kicks in all the time while 
pinning BOs for this specific hw generation and texture channel 
configuration.

Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible or 
specify why it is actually necessary?

Regards,
Christian.

Am 17.07.24 um 05:44 schrieb Marek Olšák:
> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, 
> and almost all of them are not displayable. Shouldn't we use a 
> different way to indicate that we need a non-power-of-two alignment, 
> such as looking at the alignment field directly?
>
> Marek
>
> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam 
> <Arunpravin.PaneerSelvam@amd.com> wrote:
>
>     Add address alignment support to the DCC VRAM buffers.
>
>     v2:
>       - adjust size based on the max_texture_channel_caches values
>         only for GFX12 DCC buffers.
>       - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>         for DCC buffers.
>       - roundup non power of two DCC buffer adjusted size to nearest
>         power of two number as the buddy allocator does not support non
>         power of two alignments. This applies only to the contiguous
>         DCC buffers.
>
>     v3:(Alex)
>       - rewrite the max texture channel caches comparison code in an
>         algorithmic way to determine the alignment size.
>
>     v4:(Alex)
>       - Move the logic from amdgpu_vram_mgr_dcc_alignment() to gmc_v12_0.c
>         and add a new gmc func callback for dcc alignment. If the callback
>         is non-NULL, call it to get the alignment, otherwise, use the
>     default.
>
>     v5:(Alex)
>       - Set the Alignment to a default value if the callback doesn't
>     exist.
>       - Add the callback to amdgpu_gmc_funcs.
>
>     v6:
>       - Fix checkpatch error reported by Intel CI.
>
>     Signed-off-by: Arunpravin Paneer Selvam
>     <Arunpravin.PaneerSelvam@amd.com>
>     Acked-by: Alex Deucher <alexander.deucher@amd.com>
>     Acked-by: Christian König <christian.koenig@amd.com>
>     Reviewed-by: Frank Min <Frank.Min@amd.com>
>     ---
>      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>     ++++++++++++++++++--
>      drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>      3 files changed, 55 insertions(+), 2 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     index febca3130497..654d0548a3f8 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>     @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>                                           uint64_t addr, uint64_t *flags);
>             /* get the amount of memory used by the vbios for pre-OS
>     console */
>             unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
>     +       /* get the DCC buffer alignment */
>     +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>
>             enum amdgpu_memory_partition (*query_mem_partition_mode)(
>                     struct amdgpu_device *adev);
>     @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>     (adev)->gmc.gmc_funcs->override_vm_pte_flags             \
>                     ((adev), (vm), (addr), (pte_flags))
>      #define amdgpu_gmc_get_vbios_fb_size(adev)
>     (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>     +#define amdgpu_gmc_get_dcc_alignment(_adev) ({      \
>     +       typeof(_adev) (adev) = (_adev);      \
>     +  ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));    \
>     +})
>
>      /**
>       * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>     through the BAR
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     index f91cc149d06c..aa9dca12371c 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>     @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>     ttm_resource_manager *man,
>                     vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>
>             remaining_size = (u64)vres->base.size;
>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>     +               u64 adjust_size;
>     +
>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>     +                       adjust_size =
>     amdgpu_gmc_get_dcc_alignment(adev);
>     +                       remaining_size =
>     roundup_pow_of_two(remaining_size + adjust_size);
>     +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>     +               }
>     +       }
>
>             mutex_lock(&mgr->lock);
>             while (remaining_size) {
>     @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>     ttm_resource_manager *man,
>                             min_block_size = mgr->default_page_size;
>
>                     size = remaining_size;
>     -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>     -                   !(size & (((u64)pages_per_block << PAGE_SHIFT)
>     - 1)))
>     +
>     +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>     +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>     +                       min_block_size = size;
>     +               else if ((size >= (u64)pages_per_block <<
>     PAGE_SHIFT) &&
>     +                        !(size & (((u64)pages_per_block <<
>     PAGE_SHIFT) - 1)))
>                             min_block_size = (u64)pages_per_block <<
>     PAGE_SHIFT;
>
>                     BUG_ON(min_block_size < mm->chunk_size);
>     @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>     ttm_resource_manager *man,
>             }
>             mutex_unlock(&mgr->lock);
>
>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>     +               struct drm_buddy_block *dcc_block;
>     +               u64 dcc_start, alignment;
>     +
>     +               dcc_block =
>     amdgpu_vram_mgr_first_block(&vres->blocks);
>     +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>     +
>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>     +                       alignment =
>     amdgpu_gmc_get_dcc_alignment(adev);
>     +                       /* Adjust the start address for DCC
>     buffers only */
>     +                       dcc_start = roundup(dcc_start, alignment);
>     +                       drm_buddy_block_trim(mm, &dcc_start,
>     + (u64)vres->base.size,
>     + &vres->blocks);
>     +               }
>     +       }
>     +
>             vres->base.start = 0;
>             size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
>                          vres->base.size);
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     index fd3ac483760e..4259edcdec8a 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>     @@ -542,6 +542,20 @@ static unsigned
>     gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>             return 0;
>      }
>
>     +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>     +{
>     +       u64 max_tex_channel_caches, alignment;
>     +
>     +       max_tex_channel_caches =
>     adev->gfx.config.max_texture_channel_caches;
>     +       if (is_power_of_2(max_tex_channel_caches))
>     +               alignment = (max_tex_channel_caches / SZ_4) *
>     max_tex_channel_caches;
>     +       else
>     +               alignment =
>     roundup_pow_of_two(max_tex_channel_caches) *
>     +                               max_tex_channel_caches;
>     +
>     +       return (u64)alignment * SZ_1K;
>     +}
>     +
>      static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>             .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>             .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>     @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>     gmc_v12_0_gmc_funcs = {
>             .get_vm_pde = gmc_v12_0_get_vm_pde,
>             .get_vm_pte = gmc_v12_0_get_vm_pte,
>             .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>     +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>      };
>
>      static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>     -- 
>     2.25.1
>
Paneer Selvam, Arunpravin July 17, 2024, 2:38 p.m. UTC | #3
Hi Christian,

Can we use the below combination flags to kick in hardware workaround 
while pinning BO's for this specific hw generation.

if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {
}

Regards,
Arun.

On 7/17/2024 2:38 PM, Christian König wrote:
> Well that approach was discussed before and seemed to be to complicated.
>
> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a bad 
> idea. This isn't anything userspace should need to specify in the 
> first place.
>
> All we need is a hardware workaround which kicks in all the time while 
> pinning BOs for this specific hw generation and texture channel 
> configuration.
>
> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible 
> or specify why it is actually necessary?
>
> Regards,
> Christian.
>
> Am 17.07.24 um 05:44 schrieb Marek Olšák:
>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, 
>> and almost all of them are not displayable. Shouldn't we use a 
>> different way to indicate that we need a non-power-of-two alignment, 
>> such as looking at the alignment field directly?
>>
>> Marek
>>
>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>
>>     Add address alignment support to the DCC VRAM buffers.
>>
>>     v2:
>>       - adjust size based on the max_texture_channel_caches values
>>         only for GFX12 DCC buffers.
>>       - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>>         for DCC buffers.
>>       - roundup non power of two DCC buffer adjusted size to nearest
>>         power of two number as the buddy allocator does not support non
>>         power of two alignments. This applies only to the contiguous
>>         DCC buffers.
>>
>>     v3:(Alex)
>>       - rewrite the max texture channel caches comparison code in an
>>         algorithmic way to determine the alignment size.
>>
>>     v4:(Alex)
>>       - Move the logic from amdgpu_vram_mgr_dcc_alignment() to
>>     gmc_v12_0.c
>>         and add a new gmc func callback for dcc alignment. If the
>>     callback
>>         is non-NULL, call it to get the alignment, otherwise, use the
>>     default.
>>
>>     v5:(Alex)
>>       - Set the Alignment to a default value if the callback doesn't
>>     exist.
>>       - Add the callback to amdgpu_gmc_funcs.
>>
>>     v6:
>>       - Fix checkpatch error reported by Intel CI.
>>
>>     Signed-off-by: Arunpravin Paneer Selvam
>>     <Arunpravin.PaneerSelvam@amd.com>
>>     Acked-by: Alex Deucher <alexander.deucher@amd.com>
>>     Acked-by: Christian König <christian.koenig@amd.com>
>>     Reviewed-by: Frank Min <Frank.Min@amd.com>
>>     ---
>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>>     ++++++++++++++++++--
>>      drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>>      3 files changed, 55 insertions(+), 2 deletions(-)
>>
>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     index febca3130497..654d0548a3f8 100644
>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>     @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>>                                           uint64_t addr, uint64_t
>>     *flags);
>>             /* get the amount of memory used by the vbios for pre-OS
>>     console */
>>             unsigned int (*get_vbios_fb_size)(struct amdgpu_device
>>     *adev);
>>     +       /* get the DCC buffer alignment */
>>     +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>>
>>             enum amdgpu_memory_partition (*query_mem_partition_mode)(
>>                     struct amdgpu_device *adev);
>>     @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>>     (adev)->gmc.gmc_funcs->override_vm_pte_flags               \
>>                     ((adev), (vm), (addr), (pte_flags))
>>      #define amdgpu_gmc_get_vbios_fb_size(adev)
>>     (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>>     +#define amdgpu_gmc_get_dcc_alignment(_adev) ({        \
>>     +       typeof(_adev) (adev) = (_adev);        \
>>     +  ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));    \
>>     +})
>>
>>      /**
>>       * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>>     through the BAR
>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     index f91cc149d06c..aa9dca12371c 100644
>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>     @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>>     ttm_resource_manager *man,
>>                     vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>
>>             remaining_size = (u64)vres->base.size;
>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>     +               u64 adjust_size;
>>     +
>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>     +                       adjust_size =
>>     amdgpu_gmc_get_dcc_alignment(adev);
>>     +                       remaining_size =
>>     roundup_pow_of_two(remaining_size + adjust_size);
>>     +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>>     +               }
>>     +       }
>>
>>             mutex_lock(&mgr->lock);
>>             while (remaining_size) {
>>     @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>>     ttm_resource_manager *man,
>>                             min_block_size = mgr->default_page_size;
>>
>>                     size = remaining_size;
>>     -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>>     -                   !(size & (((u64)pages_per_block <<
>>     PAGE_SHIFT) - 1)))
>>     +
>>     +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>     +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>>     +                       min_block_size = size;
>>     +               else if ((size >= (u64)pages_per_block <<
>>     PAGE_SHIFT) &&
>>     +                        !(size & (((u64)pages_per_block <<
>>     PAGE_SHIFT) - 1)))
>>                             min_block_size = (u64)pages_per_block <<
>>     PAGE_SHIFT;
>>
>>                     BUG_ON(min_block_size < mm->chunk_size);
>>     @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>>     ttm_resource_manager *man,
>>             }
>>             mutex_unlock(&mgr->lock);
>>
>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>     +               struct drm_buddy_block *dcc_block;
>>     +               u64 dcc_start, alignment;
>>     +
>>     +               dcc_block =
>>     amdgpu_vram_mgr_first_block(&vres->blocks);
>>     +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>>     +
>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>     +                       alignment =
>>     amdgpu_gmc_get_dcc_alignment(adev);
>>     +                       /* Adjust the start address for DCC
>>     buffers only */
>>     +                       dcc_start = roundup(dcc_start, alignment);
>>     +                       drm_buddy_block_trim(mm, &dcc_start,
>>     + (u64)vres->base.size,
>>     + &vres->blocks);
>>     +               }
>>     +       }
>>     +
>>             vres->base.start = 0;
>>             size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
>>                          vres->base.size);
>>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     index fd3ac483760e..4259edcdec8a 100644
>>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>     @@ -542,6 +542,20 @@ static unsigned
>>     gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>>             return 0;
>>      }
>>
>>     +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>>     +{
>>     +       u64 max_tex_channel_caches, alignment;
>>     +
>>     +       max_tex_channel_caches =
>>     adev->gfx.config.max_texture_channel_caches;
>>     +       if (is_power_of_2(max_tex_channel_caches))
>>     +               alignment = (max_tex_channel_caches / SZ_4) *
>>     max_tex_channel_caches;
>>     +       else
>>     +               alignment =
>>     roundup_pow_of_two(max_tex_channel_caches) *
>>     +                               max_tex_channel_caches;
>>     +
>>     +       return (u64)alignment * SZ_1K;
>>     +}
>>     +
>>      static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>>             .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>>             .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>>     @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>>     gmc_v12_0_gmc_funcs = {
>>             .get_vm_pde = gmc_v12_0_get_vm_pde,
>>             .get_vm_pte = gmc_v12_0_get_vm_pte,
>>             .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>>     +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>>      };
>>
>>      static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>>     -- 
>>     2.25.1
>>
>
Christian König July 17, 2024, 2:50 p.m. UTC | #4
As far as I know, yes.

Regards,
Christian.

Am 17.07.24 um 16:38 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
> Can we use the below combination flags to kick in hardware workaround 
> while pinning BO's for this specific hw generation.
>
> if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&
> (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
> amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {
> }
>
> Regards,
> Arun.
>
> On 7/17/2024 2:38 PM, Christian König wrote:
>> Well that approach was discussed before and seemed to be to complicated.
>>
>> But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a 
>> bad idea. This isn't anything userspace should need to specify in the 
>> first place.
>>
>> All we need is a hardware workaround which kicks in all the time 
>> while pinning BOs for this specific hw generation and texture channel 
>> configuration.
>>
>> Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if possible 
>> or specify why it is actually necessary?
>>
>> Regards,
>> Christian.
>>
>> Am 17.07.24 um 05:44 schrieb Marek Olšák:
>>> AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory allocations, 
>>> and almost all of them are not displayable. Shouldn't we use a 
>>> different way to indicate that we need a non-power-of-two alignment, 
>>> such as looking at the alignment field directly?
>>>
>>> Marek
>>>
>>> On Tue, Jul 16, 2024, 11:45 Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>
>>>     Add address alignment support to the DCC VRAM buffers.
>>>
>>>     v2:
>>>       - adjust size based on the max_texture_channel_caches values
>>>         only for GFX12 DCC buffers.
>>>       - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply change only
>>>         for DCC buffers.
>>>       - roundup non power of two DCC buffer adjusted size to nearest
>>>         power of two number as the buddy allocator does not support non
>>>         power of two alignments. This applies only to the contiguous
>>>         DCC buffers.
>>>
>>>     v3:(Alex)
>>>       - rewrite the max texture channel caches comparison code in an
>>>         algorithmic way to determine the alignment size.
>>>
>>>     v4:(Alex)
>>>       - Move the logic from amdgpu_vram_mgr_dcc_alignment() to
>>>     gmc_v12_0.c
>>>         and add a new gmc func callback for dcc alignment. If the
>>>     callback
>>>         is non-NULL, call it to get the alignment, otherwise, use
>>>     the default.
>>>
>>>     v5:(Alex)
>>>       - Set the Alignment to a default value if the callback doesn't
>>>     exist.
>>>       - Add the callback to amdgpu_gmc_funcs.
>>>
>>>     v6:
>>>       - Fix checkpatch error reported by Intel CI.
>>>
>>>     Signed-off-by: Arunpravin Paneer Selvam
>>>     <Arunpravin.PaneerSelvam@amd.com>
>>>     Acked-by: Alex Deucher <alexander.deucher@amd.com>
>>>     Acked-by: Christian König <christian.koenig@amd.com>
>>>     Reviewed-by: Frank Min <Frank.Min@amd.com>
>>>     ---
>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++
>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
>>>     ++++++++++++++++++--
>>>      drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15 ++++++++
>>>      3 files changed, 55 insertions(+), 2 deletions(-)
>>>
>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     index febca3130497..654d0548a3f8 100644
>>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>>>     @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
>>>                                           uint64_t addr, uint64_t
>>>     *flags);
>>>             /* get the amount of memory used by the vbios for pre-OS
>>>     console */
>>>             unsigned int (*get_vbios_fb_size)(struct amdgpu_device
>>>     *adev);
>>>     +       /* get the DCC buffer alignment */
>>>     +       u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
>>>
>>>             enum amdgpu_memory_partition (*query_mem_partition_mode)(
>>>                     struct amdgpu_device *adev);
>>>     @@ -363,6 +365,10 @@ struct amdgpu_gmc {
>>>     (adev)->gmc.gmc_funcs->override_vm_pte_flags                 \
>>>                     ((adev), (vm), (addr), (pte_flags))
>>>      #define amdgpu_gmc_get_vbios_fb_size(adev)
>>>     (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
>>>     +#define amdgpu_gmc_get_dcc_alignment(_adev) ({          \
>>>     +       typeof(_adev) (adev) = (_adev);          \
>>>     +  ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));    \
>>>     +})
>>>
>>>      /**
>>>       * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>>>     through the BAR
>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     index f91cc149d06c..aa9dca12371c 100644
>>>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>     @@ -512,6 +512,16 @@ static int amdgpu_vram_mgr_new(struct
>>>     ttm_resource_manager *man,
>>>                     vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>
>>>             remaining_size = (u64)vres->base.size;
>>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>>     +               u64 adjust_size;
>>>     +
>>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>>     +                       adjust_size =
>>>     amdgpu_gmc_get_dcc_alignment(adev);
>>>     +                       remaining_size =
>>>     roundup_pow_of_two(remaining_size + adjust_size);
>>>     +                       vres->flags |= DRM_BUDDY_TRIM_DISABLE;
>>>     +               }
>>>     +       }
>>>
>>>             mutex_lock(&mgr->lock);
>>>             while (remaining_size) {
>>>     @@ -521,8 +531,12 @@ static int amdgpu_vram_mgr_new(struct
>>>     ttm_resource_manager *man,
>>>                             min_block_size = mgr->default_page_size;
>>>
>>>                     size = remaining_size;
>>>     -               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
>>>     -                   !(size & (((u64)pages_per_block <<
>>>     PAGE_SHIFT) - 1)))
>>>     +
>>>     +               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>>     +                   bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
>>>     +                       min_block_size = size;
>>>     +               else if ((size >= (u64)pages_per_block <<
>>>     PAGE_SHIFT) &&
>>>     +                        !(size & (((u64)pages_per_block <<
>>>     PAGE_SHIFT) - 1)))
>>>                             min_block_size = (u64)pages_per_block <<
>>>     PAGE_SHIFT;
>>>
>>>                     BUG_ON(min_block_size < mm->chunk_size);
>>>     @@ -553,6 +567,24 @@ static int amdgpu_vram_mgr_new(struct
>>>     ttm_resource_manager *man,
>>>             }
>>>             mutex_unlock(&mgr->lock);
>>>
>>>     +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
>>>     +           bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
>>>     +               struct drm_buddy_block *dcc_block;
>>>     +               u64 dcc_start, alignment;
>>>     +
>>>     +               dcc_block =
>>>     amdgpu_vram_mgr_first_block(&vres->blocks);
>>>     +               dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
>>>     +
>>>     +               if (adev->gmc.gmc_funcs->get_dcc_alignment) {
>>>     +                       alignment =
>>>     amdgpu_gmc_get_dcc_alignment(adev);
>>>     +                       /* Adjust the start address for DCC
>>>     buffers only */
>>>     +                       dcc_start = roundup(dcc_start, alignment);
>>>     +                       drm_buddy_block_trim(mm, &dcc_start,
>>>     + (u64)vres->base.size,
>>>     + &vres->blocks);
>>>     +               }
>>>     +       }
>>>     +
>>>             vres->base.start = 0;
>>>             size = max_t(u64,
>>>     amdgpu_vram_mgr_blocks_size(&vres->blocks),
>>>                          vres->base.size);
>>>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     index fd3ac483760e..4259edcdec8a 100644
>>>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
>>>     @@ -542,6 +542,20 @@ static unsigned
>>>     gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
>>>             return 0;
>>>      }
>>>
>>>     +static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
>>>     +{
>>>     +       u64 max_tex_channel_caches, alignment;
>>>     +
>>>     +       max_tex_channel_caches =
>>>     adev->gfx.config.max_texture_channel_caches;
>>>     +       if (is_power_of_2(max_tex_channel_caches))
>>>     +               alignment = (max_tex_channel_caches / SZ_4) *
>>>     max_tex_channel_caches;
>>>     +       else
>>>     +               alignment =
>>>     roundup_pow_of_two(max_tex_channel_caches) *
>>>     +  max_tex_channel_caches;
>>>     +
>>>     +       return (u64)alignment * SZ_1K;
>>>     +}
>>>     +
>>>      static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
>>>             .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
>>>             .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
>>>     @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
>>>     gmc_v12_0_gmc_funcs = {
>>>             .get_vm_pde = gmc_v12_0_get_vm_pde,
>>>             .get_vm_pte = gmc_v12_0_get_vm_pte,
>>>             .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
>>>     +       .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
>>>      };
>>>
>>>      static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
>>>     -- 
>>>     2.25.1
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index febca3130497..654d0548a3f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -156,6 +156,8 @@  struct amdgpu_gmc_funcs {
 				      uint64_t addr, uint64_t *flags);
 	/* get the amount of memory used by the vbios for pre-OS console */
 	unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+	/* get the DCC buffer alignment */
+	u64 (*get_dcc_alignment)(struct amdgpu_device *adev);
 
 	enum amdgpu_memory_partition (*query_mem_partition_mode)(
 		struct amdgpu_device *adev);
@@ -363,6 +365,10 @@  struct amdgpu_gmc {
 	(adev)->gmc.gmc_funcs->override_vm_pte_flags			\
 		((adev), (vm), (addr), (pte_flags))
 #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_get_dcc_alignment(_adev) ({			\
+	typeof(_adev) (adev) = (_adev);				\
+	((adev)->gmc.gmc_funcs->get_dcc_alignment((adev)));	\
+})
 
 /**
  * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index f91cc149d06c..aa9dca12371c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -512,6 +512,16 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
 	remaining_size = (u64)vres->base.size;
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+	    bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
+		u64 adjust_size;
+
+		if (adev->gmc.gmc_funcs->get_dcc_alignment) {
+			adjust_size = amdgpu_gmc_get_dcc_alignment(adev);
+			remaining_size = roundup_pow_of_two(remaining_size + adjust_size);
+			vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+		}
+	}
 
 	mutex_lock(&mgr->lock);
 	while (remaining_size) {
@@ -521,8 +531,12 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			min_block_size = mgr->default_page_size;
 
 		size = remaining_size;
-		if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
-		    !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+
+		if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+		    bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+			min_block_size = size;
+		else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+			 !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
 			min_block_size = (u64)pages_per_block << PAGE_SHIFT;
 
 		BUG_ON(min_block_size < mm->chunk_size);
@@ -553,6 +567,24 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	}
 	mutex_unlock(&mgr->lock);
 
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+	    bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) {
+		struct drm_buddy_block *dcc_block;
+		u64 dcc_start, alignment;
+
+		dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
+		dcc_start = amdgpu_vram_mgr_block_start(dcc_block);
+
+		if (adev->gmc.gmc_funcs->get_dcc_alignment) {
+			alignment = amdgpu_gmc_get_dcc_alignment(adev);
+			/* Adjust the start address for DCC buffers only */
+			dcc_start = roundup(dcc_start, alignment);
+			drm_buddy_block_trim(mm, &dcc_start,
+					     (u64)vres->base.size,
+					     &vres->blocks);
+		}
+	}
+
 	vres->base.start = 0;
 	size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
 		     vres->base.size);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index fd3ac483760e..4259edcdec8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -542,6 +542,20 @@  static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
 	return 0;
 }
 
+static u64 gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
+{
+	u64 max_tex_channel_caches, alignment;
+
+	max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;
+	if (is_power_of_2(max_tex_channel_caches))
+		alignment = (max_tex_channel_caches / SZ_4) * max_tex_channel_caches;
+	else
+		alignment = roundup_pow_of_two(max_tex_channel_caches) *
+				max_tex_channel_caches;
+
+	return (u64)alignment * SZ_1K;
+}
+
 static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
 	.flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
 	.flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
@@ -551,6 +565,7 @@  static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
 	.get_vm_pde = gmc_v12_0_get_vm_pde,
 	.get_vm_pte = gmc_v12_0_get_vm_pte,
 	.get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
+	.get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
 };
 
 static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)