diff mbox series

[v11,5/5] drm/amdgpu: add drm buddy support to amdgpu

Message ID 20220127141107.173692-5-Arunpravin.PaneerSelvam@amd.com (mailing list archive)
State New, archived
Headers show
Series [v11,1/5] drm: improve drm_buddy_alloc function | expand

Commit Message

Paneer Selvam, Arunpravin Jan. 27, 2022, 2:11 p.m. UTC
- Remove drm_mm references and replace with drm buddy functionalities
- Add res cursor support for drm buddy

v2(Matthew Auld):
  - replace spinlock with mutex as we call kmem_cache_zalloc
    (..., GFP_KERNEL) in drm_buddy_alloc() function

  - lock drm_buddy_block_trim() function as it calls
    mark_free/mark_split are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v5:
  - fix merge conflict issue

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/Kconfig                       |   1 +
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
 4 files changed, 231 insertions(+), 133 deletions(-)

Comments

Matthew Auld Jan. 28, 2022, 2:18 p.m. UTC | #1
On Thu, 27 Jan 2022 at 14:11, Arunpravin
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> - Remove drm_mm references and replace with drm buddy functionalities
> - Add res cursor support for drm buddy
>
> v2(Matthew Auld):
>   - replace spinlock with mutex as we call kmem_cache_zalloc
>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>
>   - lock drm_buddy_block_trim() function as it calls
>     mark_free/mark_split are all globally visible
>
> v3(Matthew Auld):
>   - remove trim method error handling as we address the failure case
>     at drm_buddy_block_trim() function
>
> v4:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> v5:
>   - fix merge conflict issue
>
> v6:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>  drivers/gpu/drm/Kconfig                       |   1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>  4 files changed, 231 insertions(+), 133 deletions(-)

<snip>

>
> -/**
> - * amdgpu_vram_mgr_virt_start - update virtual start address
> - *
> - * @mem: ttm_resource to update
> - * @node: just allocated node
> - *
> - * Calculate a virtual BO start address to easily check if everything is CPU
> - * accessible.
> - */
> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> -                                      struct drm_mm_node *node)
> -{
> -       unsigned long start;
> -
> -       start = node->start + node->size;
> -       if (start > mem->num_pages)
> -               start -= mem->num_pages;
> -       else
> -               start = 0;
> -       mem->start = max(mem->start, start);
> -}
> -
>  /**
>   * amdgpu_vram_mgr_new - allocate new ranges
>   *
> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                                const struct ttm_place *place,
>                                struct ttm_resource **res)
>  {
> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
> -       struct ttm_range_mgr_node *node;
> -       struct drm_mm *mm = &mgr->mm;
> -       enum drm_mm_insert_mode mode;
> +       struct amdgpu_vram_mgr_node *node;
> +       struct drm_buddy *mm = &mgr->mm;
> +       struct drm_buddy_block *block;
>         unsigned i;
>         int r;
>
> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                 goto error_sub;
>         }
>
> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 pages_per_node = ~0ul;
> -               num_nodes = 1;
> -       } else {
> +       else {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>                 pages_per_node = HPAGE_PMD_NR;
>  #else
> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  #endif
>                 pages_per_node = max_t(uint32_t, pages_per_node,
>                                        tbo->page_alignment);
> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>         }
>
> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
> -                       GFP_KERNEL | __GFP_ZERO);
> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>         if (!node) {
>                 r = -ENOMEM;
>                 goto error_sub;
> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>
>         ttm_resource_init(tbo, place, &node->base);
>
> -       mode = DRM_MM_INSERT_BEST;
> +       INIT_LIST_HEAD(&node->blocks);
> +
>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
> -               mode = DRM_MM_INSERT_HIGH;
> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> +       if (place->fpfn || lpfn != man->size)
> +               /* Allocate blocks in desired range */
> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> +       min_page_size = mgr->default_page_size;
> +       BUG_ON(min_page_size < mm->chunk_size);
>
>         pages_left = node->base.num_pages;
>
> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>
>         i = 0;
> -       spin_lock(&mgr->lock);
>         while (pages_left) {
> -               uint32_t alignment = tbo->page_alignment;
> -
>                 if (pages >= pages_per_node)
> -                       alignment = pages_per_node;
> -
> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> -                                               alignment, 0, place->fpfn,
> -                                               lpfn, mode);
> -               if (unlikely(r)) {
> -                       if (pages > pages_per_node) {
> -                               if (is_power_of_2(pages))
> -                                       pages = pages / 2;
> -                               else
> -                                       pages = rounddown_pow_of_two(pages);
> -                               continue;
> -                       }
> -                       goto error_free;
> +                       pages = pages_per_node;
> +
> +               n_pages = pages;
> +
> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +                       n_pages = roundup_pow_of_two(n_pages);
> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
> +
> +                       if (n_pages > lpfn)
> +                               lpfn = n_pages;
>                 }
>
> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
> +               mutex_lock(&mgr->lock);
> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +                                          (u64)lpfn << PAGE_SHIFT,
> +                                          (u64)n_pages << PAGE_SHIFT,
> +                                          min_page_size,
> +                                          &node->blocks,
> +                                          node->flags);
> +               mutex_unlock(&mgr->lock);
> +               if (unlikely(r))
> +                       goto error_free_blocks;
> +
>                 pages_left -= pages;
>                 ++i;
>
>                 if (pages > pages_left)
>                         pages = pages_left;
>         }
> -       spin_unlock(&mgr->lock);
> +
> +       /* Free unused pages for contiguous allocation */
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
> +
> +               mutex_lock(&mgr->lock);
> +               drm_buddy_block_trim(mm,
> +                                    actual_size,
> +                                    &node->blocks);
> +               mutex_unlock(&mgr->lock);
> +       }
> +
> +       list_for_each_entry(block, &node->blocks, link)
> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
> +
> +       block = list_first_entry_or_null(&node->blocks,
> +                                        struct drm_buddy_block,
> +                                        link);
> +       if (!block) {
> +               r = -ENOENT;
> +               goto error_free_res;
> +       }
> +
> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;

Hmm, does this work? It looks like there are various places checking
that res->start + res->num_pages <= visible_size, which IIUC should
only return true when the entire object is placed in the mappable
portion. i915 is doing something similar. Also it looks like
ttm_resource_compat() is potentially relying on this, like when moving
something from non-mappable -> mappable in
amdgpu_bo_fault_reserve_notify()?

Perhaps something like:

if (vis_usage == num_pages)
    base.start = 0;
else
    base.start = visible_size;

Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
Paneer Selvam, Arunpravin Feb. 4, 2022, 11:22 a.m. UTC | #2
On 28/01/22 7:48 pm, Matthew Auld wrote:
> On Thu, 27 Jan 2022 at 14:11, Arunpravin
> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>
>> - Remove drm_mm references and replace with drm buddy functionalities
>> - Add res cursor support for drm buddy
>>
>> v2(Matthew Auld):
>>   - replace spinlock with mutex as we call kmem_cache_zalloc
>>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>>
>>   - lock drm_buddy_block_trim() function as it calls
>>     mark_free/mark_split are all globally visible
>>
>> v3(Matthew Auld):
>>   - remove trim method error handling as we address the failure case
>>     at drm_buddy_block_trim() function
>>
>> v4:
>>   - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> v5:
>>   - fix merge conflict issue
>>
>> v6:
>>   - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>  drivers/gpu/drm/Kconfig                       |   1 +
>>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>  4 files changed, 231 insertions(+), 133 deletions(-)
> 
> <snip>
> 
>>
>> -/**
>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>> - *
>> - * @mem: ttm_resource to update
>> - * @node: just allocated node
>> - *
>> - * Calculate a virtual BO start address to easily check if everything is CPU
>> - * accessible.
>> - */
>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>> -                                      struct drm_mm_node *node)
>> -{
>> -       unsigned long start;
>> -
>> -       start = node->start + node->size;
>> -       if (start > mem->num_pages)
>> -               start -= mem->num_pages;
>> -       else
>> -               start = 0;
>> -       mem->start = max(mem->start, start);
>> -}
>> -
>>  /**
>>   * amdgpu_vram_mgr_new - allocate new ranges
>>   *
>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                                const struct ttm_place *place,
>>                                struct ttm_resource **res)
>>  {
>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> -       struct ttm_range_mgr_node *node;
>> -       struct drm_mm *mm = &mgr->mm;
>> -       enum drm_mm_insert_mode mode;
>> +       struct amdgpu_vram_mgr_node *node;
>> +       struct drm_buddy *mm = &mgr->mm;
>> +       struct drm_buddy_block *block;
>>         unsigned i;
>>         int r;
>>
>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                 goto error_sub;
>>         }
>>
>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>                 pages_per_node = ~0ul;
>> -               num_nodes = 1;
>> -       } else {
>> +       else {
>>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>                 pages_per_node = HPAGE_PMD_NR;
>>  #else
>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>  #endif
>>                 pages_per_node = max_t(uint32_t, pages_per_node,
>>                                        tbo->page_alignment);
>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>         }
>>
>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>> -                       GFP_KERNEL | __GFP_ZERO);
>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>         if (!node) {
>>                 r = -ENOMEM;
>>                 goto error_sub;
>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>
>>         ttm_resource_init(tbo, place, &node->base);
>>
>> -       mode = DRM_MM_INSERT_BEST;
>> +       INIT_LIST_HEAD(&node->blocks);
>> +
>>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
>> -               mode = DRM_MM_INSERT_HIGH;
>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>> +
>> +       if (place->fpfn || lpfn != man->size)
>> +               /* Allocate blocks in desired range */
>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>> +       min_page_size = mgr->default_page_size;
>> +       BUG_ON(min_page_size < mm->chunk_size);
>>
>>         pages_left = node->base.num_pages;
>>
>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>
>>         i = 0;
>> -       spin_lock(&mgr->lock);
>>         while (pages_left) {
>> -               uint32_t alignment = tbo->page_alignment;
>> -
>>                 if (pages >= pages_per_node)
>> -                       alignment = pages_per_node;
>> -
>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>> -                                               alignment, 0, place->fpfn,
>> -                                               lpfn, mode);
>> -               if (unlikely(r)) {
>> -                       if (pages > pages_per_node) {
>> -                               if (is_power_of_2(pages))
>> -                                       pages = pages / 2;
>> -                               else
>> -                                       pages = rounddown_pow_of_two(pages);
>> -                               continue;
>> -                       }
>> -                       goto error_free;
>> +                       pages = pages_per_node;
>> +
>> +               n_pages = pages;
>> +
>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +                       n_pages = roundup_pow_of_two(n_pages);
>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>> +
>> +                       if (n_pages > lpfn)
>> +                               lpfn = n_pages;
>>                 }
>>
>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>> +               mutex_lock(&mgr->lock);
>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +                                          (u64)lpfn << PAGE_SHIFT,
>> +                                          (u64)n_pages << PAGE_SHIFT,
>> +                                          min_page_size,
>> +                                          &node->blocks,
>> +                                          node->flags);
>> +               mutex_unlock(&mgr->lock);
>> +               if (unlikely(r))
>> +                       goto error_free_blocks;
>> +
>>                 pages_left -= pages;
>>                 ++i;
>>
>>                 if (pages > pages_left)
>>                         pages = pages_left;
>>         }
>> -       spin_unlock(&mgr->lock);
>> +
>> +       /* Free unused pages for contiguous allocation */
>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>> +
>> +               mutex_lock(&mgr->lock);
>> +               drm_buddy_block_trim(mm,
>> +                                    actual_size,
>> +                                    &node->blocks);
>> +               mutex_unlock(&mgr->lock);
>> +       }
>> +
>> +       list_for_each_entry(block, &node->blocks, link)
>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>> +
>> +       block = list_first_entry_or_null(&node->blocks,
>> +                                        struct drm_buddy_block,
>> +                                        link);
>> +       if (!block) {
>> +               r = -ENOENT;
>> +               goto error_free_res;
>> +       }
>> +
>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
> 
> Hmm, does this work? It looks like there are various places checking
> that res->start + res->num_pages <= visible_size, which IIUC should
> only return true when the entire object is placed in the mappable
> portion. i915 is doing something similar. Also it looks like
> ttm_resource_compat() is potentially relying on this, like when moving
> something from non-mappable -> mappable in
> amdgpu_bo_fault_reserve_notify()?
> 
> Perhaps something like:
> 
> if (vis_usage == num_pages)
>     base.start = 0;
> else
>     base.start = visible_size;
> 
> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
> 

hmm, I wonder how it works, may be we didn't go through the corner case
where res->start + res->num_pages > visible_size

in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
set the ttm_place.lpfn = visible_pfn at
amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
alloc_range_bias() in drm_buddy.c.

Here we get blocks chained together in random order complying
visible_pfn range. say for instance num_pages = 13
we may get,
Block 1 addr - 500 (order-3)
Block 2 addr - 400 (order-2)
Block 3 addr - 600 (order-0)

I think currently base.start = Block 1 start address fetched from the
list and the address 500 assigned to it, which is good for the resource
access since we access the blocks using the list link

But for the check res->start + res->num_pages <= visible_size in few
places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
doesn't work since the function looks for nodes in continuous address to
calculate the start address. AFAIK, assigning the start address (400 +
num_pages <= visible_size) mislead in our case since we use linked list

how about replacing the check with a bool type return function which
checks the each block start address + block size <= visible_size?
Christian König Feb. 4, 2022, 1:23 p.m. UTC | #3
Am 04.02.22 um 12:22 schrieb Arunpravin:
> On 28/01/22 7:48 pm, Matthew Auld wrote:
>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>> - Remove drm_mm references and replace with drm buddy functionalities
>>> - Add res cursor support for drm buddy
>>>
>>> v2(Matthew Auld):
>>>    - replace spinlock with mutex as we call kmem_cache_zalloc
>>>      (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>
>>>    - lock drm_buddy_block_trim() function as it calls
>>>      mark_free/mark_split are all globally visible
>>>
>>> v3(Matthew Auld):
>>>    - remove trim method error handling as we address the failure case
>>>      at drm_buddy_block_trim() function
>>>
>>> v4:
>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>
>>> v5:
>>>    - fix merge conflict issue
>>>
>>> v6:
>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>
>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>> ---
>>>   drivers/gpu/drm/Kconfig                       |   1 +
>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>   4 files changed, 231 insertions(+), 133 deletions(-)
>> <snip>
>>
>>> -/**
>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>> - *
>>> - * @mem: ttm_resource to update
>>> - * @node: just allocated node
>>> - *
>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>> - * accessible.
>>> - */
>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>> -                                      struct drm_mm_node *node)
>>> -{
>>> -       unsigned long start;
>>> -
>>> -       start = node->start + node->size;
>>> -       if (start > mem->num_pages)
>>> -               start -= mem->num_pages;
>>> -       else
>>> -               start = 0;
>>> -       mem->start = max(mem->start, start);
>>> -}
>>> -
>>>   /**
>>>    * amdgpu_vram_mgr_new - allocate new ranges
>>>    *
>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>                                 const struct ttm_place *place,
>>>                                 struct ttm_resource **res)
>>>   {
>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>> -       struct ttm_range_mgr_node *node;
>>> -       struct drm_mm *mm = &mgr->mm;
>>> -       enum drm_mm_insert_mode mode;
>>> +       struct amdgpu_vram_mgr_node *node;
>>> +       struct drm_buddy *mm = &mgr->mm;
>>> +       struct drm_buddy_block *block;
>>>          unsigned i;
>>>          int r;
>>>
>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>                  goto error_sub;
>>>          }
>>>
>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>                  pages_per_node = ~0ul;
>>> -               num_nodes = 1;
>>> -       } else {
>>> +       else {
>>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>                  pages_per_node = HPAGE_PMD_NR;
>>>   #else
>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>   #endif
>>>                  pages_per_node = max_t(uint32_t, pages_per_node,
>>>                                         tbo->page_alignment);
>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>          }
>>>
>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>> -                       GFP_KERNEL | __GFP_ZERO);
>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>          if (!node) {
>>>                  r = -ENOMEM;
>>>                  goto error_sub;
>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>
>>>          ttm_resource_init(tbo, place, &node->base);
>>>
>>> -       mode = DRM_MM_INSERT_BEST;
>>> +       INIT_LIST_HEAD(&node->blocks);
>>> +
>>>          if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>> -               mode = DRM_MM_INSERT_HIGH;
>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>> +
>>> +       if (place->fpfn || lpfn != man->size)
>>> +               /* Allocate blocks in desired range */
>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>> +
>>> +       min_page_size = mgr->default_page_size;
>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>
>>>          pages_left = node->base.num_pages;
>>>
>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>          pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>
>>>          i = 0;
>>> -       spin_lock(&mgr->lock);
>>>          while (pages_left) {
>>> -               uint32_t alignment = tbo->page_alignment;
>>> -
>>>                  if (pages >= pages_per_node)
>>> -                       alignment = pages_per_node;
>>> -
>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>> -                                               alignment, 0, place->fpfn,
>>> -                                               lpfn, mode);
>>> -               if (unlikely(r)) {
>>> -                       if (pages > pages_per_node) {
>>> -                               if (is_power_of_2(pages))
>>> -                                       pages = pages / 2;
>>> -                               else
>>> -                                       pages = rounddown_pow_of_two(pages);
>>> -                               continue;
>>> -                       }
>>> -                       goto error_free;
>>> +                       pages = pages_per_node;
>>> +
>>> +               n_pages = pages;
>>> +
>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>> +
>>> +                       if (n_pages > lpfn)
>>> +                               lpfn = n_pages;
>>>                  }
>>>
>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>> +               mutex_lock(&mgr->lock);
>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>> +                                          min_page_size,
>>> +                                          &node->blocks,
>>> +                                          node->flags);
>>> +               mutex_unlock(&mgr->lock);
>>> +               if (unlikely(r))
>>> +                       goto error_free_blocks;
>>> +
>>>                  pages_left -= pages;
>>>                  ++i;
>>>
>>>                  if (pages > pages_left)
>>>                          pages = pages_left;
>>>          }
>>> -       spin_unlock(&mgr->lock);
>>> +
>>> +       /* Free unused pages for contiguous allocation */
>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>> +
>>> +               mutex_lock(&mgr->lock);
>>> +               drm_buddy_block_trim(mm,
>>> +                                    actual_size,
>>> +                                    &node->blocks);
>>> +               mutex_unlock(&mgr->lock);
>>> +       }
>>> +
>>> +       list_for_each_entry(block, &node->blocks, link)
>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>> +
>>> +       block = list_first_entry_or_null(&node->blocks,
>>> +                                        struct drm_buddy_block,
>>> +                                        link);
>>> +       if (!block) {
>>> +               r = -ENOENT;
>>> +               goto error_free_res;
>>> +       }
>>> +
>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>> Hmm, does this work? It looks like there are various places checking
>> that res->start + res->num_pages <= visible_size, which IIUC should
>> only return true when the entire object is placed in the mappable
>> portion. i915 is doing something similar. Also it looks like
>> ttm_resource_compat() is potentially relying on this, like when moving
>> something from non-mappable -> mappable in
>> amdgpu_bo_fault_reserve_notify()?
>>
>> Perhaps something like:
>>
>> if (vis_usage == num_pages)
>>      base.start = 0;
>> else
>>      base.start = visible_size;
>>
>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>
> hmm, I wonder how it works, may be we didn't go through the corner case
> where res->start + res->num_pages > visible_size
>
> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
> set the ttm_place.lpfn = visible_pfn at
> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
> alloc_range_bias() in drm_buddy.c.
>
> Here we get blocks chained together in random order complying
> visible_pfn range. say for instance num_pages = 13
> we may get,
> Block 1 addr - 500 (order-3)
> Block 2 addr - 400 (order-2)
> Block 3 addr - 600 (order-0)
>
> I think currently base.start = Block 1 start address fetched from the
> list and the address 500 assigned to it, which is good for the resource
> access since we access the blocks using the list link
>
> But for the check res->start + res->num_pages <= visible_size in few
> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
> doesn't work since the function looks for nodes in continuous address to
> calculate the start address. AFAIK, assigning the start address (400 +
> num_pages <= visible_size) mislead in our case since we use linked list
>
> how about replacing the check with a bool type return function which
> checks the each block start address + block size <= visible_size?

Yeah, we already have that in the TTM code. It's just not used 
everywhere IIRC.

The node->start can just be set to the invalid offset for now and should 
be removed as soon as we don't need it any more.

Regards,
Christian.
Paneer Selvam, Arunpravin Feb. 8, 2022, 11:20 a.m. UTC | #4
On 04/02/22 6:53 pm, Christian König wrote:
> Am 04.02.22 um 12:22 schrieb Arunpravin:
>> On 28/01/22 7:48 pm, Matthew Auld wrote:
>>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>> - Remove drm_mm references and replace with drm buddy functionalities
>>>> - Add res cursor support for drm buddy
>>>>
>>>> v2(Matthew Auld):
>>>>    - replace spinlock with mutex as we call kmem_cache_zalloc
>>>>      (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>>
>>>>    - lock drm_buddy_block_trim() function as it calls
>>>>      mark_free/mark_split are all globally visible
>>>>
>>>> v3(Matthew Auld):
>>>>    - remove trim method error handling as we address the failure case
>>>>      at drm_buddy_block_trim() function
>>>>
>>>> v4:
>>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>
>>>> v5:
>>>>    - fix merge conflict issue
>>>>
>>>> v6:
>>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>
>>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/Kconfig                       |   1 +
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>>   4 files changed, 231 insertions(+), 133 deletions(-)
>>> <snip>
>>>
>>>> -/**
>>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>>> - *
>>>> - * @mem: ttm_resource to update
>>>> - * @node: just allocated node
>>>> - *
>>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>>> - * accessible.
>>>> - */
>>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>> -                                      struct drm_mm_node *node)
>>>> -{
>>>> -       unsigned long start;
>>>> -
>>>> -       start = node->start + node->size;
>>>> -       if (start > mem->num_pages)
>>>> -               start -= mem->num_pages;
>>>> -       else
>>>> -               start = 0;
>>>> -       mem->start = max(mem->start, start);
>>>> -}
>>>> -
>>>>   /**
>>>>    * amdgpu_vram_mgr_new - allocate new ranges
>>>>    *
>>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>                                 const struct ttm_place *place,
>>>>                                 struct ttm_resource **res)
>>>>   {
>>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>> -       struct ttm_range_mgr_node *node;
>>>> -       struct drm_mm *mm = &mgr->mm;
>>>> -       enum drm_mm_insert_mode mode;
>>>> +       struct amdgpu_vram_mgr_node *node;
>>>> +       struct drm_buddy *mm = &mgr->mm;
>>>> +       struct drm_buddy_block *block;
>>>>          unsigned i;
>>>>          int r;
>>>>
>>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>                  goto error_sub;
>>>>          }
>>>>
>>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>                  pages_per_node = ~0ul;
>>>> -               num_nodes = 1;
>>>> -       } else {
>>>> +       else {
>>>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>>                  pages_per_node = HPAGE_PMD_NR;
>>>>   #else
>>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>   #endif
>>>>                  pages_per_node = max_t(uint32_t, pages_per_node,
>>>>                                         tbo->page_alignment);
>>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>>          }
>>>>
>>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>>> -                       GFP_KERNEL | __GFP_ZERO);
>>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>>          if (!node) {
>>>>                  r = -ENOMEM;
>>>>                  goto error_sub;
>>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>
>>>>          ttm_resource_init(tbo, place, &node->base);
>>>>
>>>> -       mode = DRM_MM_INSERT_BEST;
>>>> +       INIT_LIST_HEAD(&node->blocks);
>>>> +
>>>>          if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>>> -               mode = DRM_MM_INSERT_HIGH;
>>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>>> +
>>>> +       if (place->fpfn || lpfn != man->size)
>>>> +               /* Allocate blocks in desired range */
>>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>> +
>>>> +       min_page_size = mgr->default_page_size;
>>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>>
>>>>          pages_left = node->base.num_pages;
>>>>
>>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>          pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>>
>>>>          i = 0;
>>>> -       spin_lock(&mgr->lock);
>>>>          while (pages_left) {
>>>> -               uint32_t alignment = tbo->page_alignment;
>>>> -
>>>>                  if (pages >= pages_per_node)
>>>> -                       alignment = pages_per_node;
>>>> -
>>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>>> -                                               alignment, 0, place->fpfn,
>>>> -                                               lpfn, mode);
>>>> -               if (unlikely(r)) {
>>>> -                       if (pages > pages_per_node) {
>>>> -                               if (is_power_of_2(pages))
>>>> -                                       pages = pages / 2;
>>>> -                               else
>>>> -                                       pages = rounddown_pow_of_two(pages);
>>>> -                               continue;
>>>> -                       }
>>>> -                       goto error_free;
>>>> +                       pages = pages_per_node;
>>>> +
>>>> +               n_pages = pages;
>>>> +
>>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>>> +
>>>> +                       if (n_pages > lpfn)
>>>> +                               lpfn = n_pages;
>>>>                  }
>>>>
>>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>>> +               mutex_lock(&mgr->lock);
>>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>>> +                                          min_page_size,
>>>> +                                          &node->blocks,
>>>> +                                          node->flags);
>>>> +               mutex_unlock(&mgr->lock);
>>>> +               if (unlikely(r))
>>>> +                       goto error_free_blocks;
>>>> +
>>>>                  pages_left -= pages;
>>>>                  ++i;
>>>>
>>>>                  if (pages > pages_left)
>>>>                          pages = pages_left;
>>>>          }
>>>> -       spin_unlock(&mgr->lock);
>>>> +
>>>> +       /* Free unused pages for contiguous allocation */
>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>>> +
>>>> +               mutex_lock(&mgr->lock);
>>>> +               drm_buddy_block_trim(mm,
>>>> +                                    actual_size,
>>>> +                                    &node->blocks);
>>>> +               mutex_unlock(&mgr->lock);
>>>> +       }
>>>> +
>>>> +       list_for_each_entry(block, &node->blocks, link)
>>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>>> +
>>>> +       block = list_first_entry_or_null(&node->blocks,
>>>> +                                        struct drm_buddy_block,
>>>> +                                        link);
>>>> +       if (!block) {
>>>> +               r = -ENOENT;
>>>> +               goto error_free_res;
>>>> +       }
>>>> +
>>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>>> Hmm, does this work? It looks like there are various places checking
>>> that res->start + res->num_pages <= visible_size, which IIUC should
>>> only return true when the entire object is placed in the mappable
>>> portion. i915 is doing something similar. Also it looks like
>>> ttm_resource_compat() is potentially relying on this, like when moving
>>> something from non-mappable -> mappable in
>>> amdgpu_bo_fault_reserve_notify()?
>>>
>>> Perhaps something like:
>>>
>>> if (vis_usage == num_pages)
>>>      base.start = 0;
>>> else
>>>      base.start = visible_size;
>>>
>>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>>
>> hmm, I wonder how it works, may be we didn't go through the corner case
>> where res->start + res->num_pages > visible_size
>>
>> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
>> set the ttm_place.lpfn = visible_pfn at
>> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
>> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
>> alloc_range_bias() in drm_buddy.c.
>>
>> Here we get blocks chained together in random order complying
>> visible_pfn range. say for instance num_pages = 13
>> we may get,
>> Block 1 addr - 500 (order-3)
>> Block 2 addr - 400 (order-2)
>> Block 3 addr - 600 (order-0)
>>
>> I think currently base.start = Block 1 start address fetched from the
>> list and the address 500 assigned to it, which is good for the resource
>> access since we access the blocks using the list link
>>
>> But for the check res->start + res->num_pages <= visible_size in few
>> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
>> doesn't work since the function looks for nodes in continuous address to
>> calculate the start address. AFAIK, assigning the start address (400 +
>> num_pages <= visible_size) mislead in our case since we use linked list
>>
>> how about replacing the check with a bool type return function which
>> checks the each block start address + block size <= visible_size?
> 
> Yeah, we already have that in the TTM code. It's just not used 
> everywhere IIRC.

Hi Christian,
here we have a problem, many places in ttm and amdgpu, we are using the
tbo->resource->start + bo->resource->num_pages operation, this doesn't
work in case of drm buddy since it allocates blocks in different
locations which are chained together using linked list.
> 
> The node->start can just be set to the invalid offset for now and should 
> be removed as soon as we don't need it any more.
Assigning the start block offset to resource->start doesn't work,
If we set node->start to invalid offset, we get an incorrect value?
> 
> Regards,
> Christian.
Matthew Auld Feb. 10, 2022, 5:52 p.m. UTC | #5
On 08/02/2022 11:20, Arunpravin wrote:
> 
> 
> On 04/02/22 6:53 pm, Christian König wrote:
>> Am 04.02.22 um 12:22 schrieb Arunpravin:
>>> On 28/01/22 7:48 pm, Matthew Auld wrote:
>>>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>>> - Remove drm_mm references and replace with drm buddy functionalities
>>>>> - Add res cursor support for drm buddy
>>>>>
>>>>> v2(Matthew Auld):
>>>>>     - replace spinlock with mutex as we call kmem_cache_zalloc
>>>>>       (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>>>
>>>>>     - lock drm_buddy_block_trim() function as it calls
>>>>>       mark_free/mark_split are all globally visible
>>>>>
>>>>> v3(Matthew Auld):
>>>>>     - remove trim method error handling as we address the failure case
>>>>>       at drm_buddy_block_trim() function
>>>>>
>>>>> v4:
>>>>>     - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>>
>>>>> v5:
>>>>>     - fix merge conflict issue
>>>>>
>>>>> v6:
>>>>>     - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>>
>>>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/Kconfig                       |   1 +
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>>>    4 files changed, 231 insertions(+), 133 deletions(-)
>>>> <snip>
>>>>
>>>>> -/**
>>>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>>>> - *
>>>>> - * @mem: ttm_resource to update
>>>>> - * @node: just allocated node
>>>>> - *
>>>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>>>> - * accessible.
>>>>> - */
>>>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>>> -                                      struct drm_mm_node *node)
>>>>> -{
>>>>> -       unsigned long start;
>>>>> -
>>>>> -       start = node->start + node->size;
>>>>> -       if (start > mem->num_pages)
>>>>> -               start -= mem->num_pages;
>>>>> -       else
>>>>> -               start = 0;
>>>>> -       mem->start = max(mem->start, start);
>>>>> -}
>>>>> -
>>>>>    /**
>>>>>     * amdgpu_vram_mgr_new - allocate new ranges
>>>>>     *
>>>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>                                  const struct ttm_place *place,
>>>>>                                  struct ttm_resource **res)
>>>>>    {
>>>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>>>           struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>>           struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>>> -       struct ttm_range_mgr_node *node;
>>>>> -       struct drm_mm *mm = &mgr->mm;
>>>>> -       enum drm_mm_insert_mode mode;
>>>>> +       struct amdgpu_vram_mgr_node *node;
>>>>> +       struct drm_buddy *mm = &mgr->mm;
>>>>> +       struct drm_buddy_block *block;
>>>>>           unsigned i;
>>>>>           int r;
>>>>>
>>>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>                   goto error_sub;
>>>>>           }
>>>>>
>>>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>>                   pages_per_node = ~0ul;
>>>>> -               num_nodes = 1;
>>>>> -       } else {
>>>>> +       else {
>>>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>>>                   pages_per_node = HPAGE_PMD_NR;
>>>>>    #else
>>>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>    #endif
>>>>>                   pages_per_node = max_t(uint32_t, pages_per_node,
>>>>>                                          tbo->page_alignment);
>>>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>>>           }
>>>>>
>>>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>>>> -                       GFP_KERNEL | __GFP_ZERO);
>>>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>>>           if (!node) {
>>>>>                   r = -ENOMEM;
>>>>>                   goto error_sub;
>>>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>
>>>>>           ttm_resource_init(tbo, place, &node->base);
>>>>>
>>>>> -       mode = DRM_MM_INSERT_BEST;
>>>>> +       INIT_LIST_HEAD(&node->blocks);
>>>>> +
>>>>>           if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>>>> -               mode = DRM_MM_INSERT_HIGH;
>>>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>>>> +
>>>>> +       if (place->fpfn || lpfn != man->size)
>>>>> +               /* Allocate blocks in desired range */
>>>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>>> +
>>>>> +       min_page_size = mgr->default_page_size;
>>>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>>>
>>>>>           pages_left = node->base.num_pages;
>>>>>
>>>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>           pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>>>
>>>>>           i = 0;
>>>>> -       spin_lock(&mgr->lock);
>>>>>           while (pages_left) {
>>>>> -               uint32_t alignment = tbo->page_alignment;
>>>>> -
>>>>>                   if (pages >= pages_per_node)
>>>>> -                       alignment = pages_per_node;
>>>>> -
>>>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>>>> -                                               alignment, 0, place->fpfn,
>>>>> -                                               lpfn, mode);
>>>>> -               if (unlikely(r)) {
>>>>> -                       if (pages > pages_per_node) {
>>>>> -                               if (is_power_of_2(pages))
>>>>> -                                       pages = pages / 2;
>>>>> -                               else
>>>>> -                                       pages = rounddown_pow_of_two(pages);
>>>>> -                               continue;
>>>>> -                       }
>>>>> -                       goto error_free;
>>>>> +                       pages = pages_per_node;
>>>>> +
>>>>> +               n_pages = pages;
>>>>> +
>>>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>>>> +
>>>>> +                       if (n_pages > lpfn)
>>>>> +                               lpfn = n_pages;
>>>>>                   }
>>>>>
>>>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>>>> +               mutex_lock(&mgr->lock);
>>>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>>>> +                                          min_page_size,
>>>>> +                                          &node->blocks,
>>>>> +                                          node->flags);
>>>>> +               mutex_unlock(&mgr->lock);
>>>>> +               if (unlikely(r))
>>>>> +                       goto error_free_blocks;
>>>>> +
>>>>>                   pages_left -= pages;
>>>>>                   ++i;
>>>>>
>>>>>                   if (pages > pages_left)
>>>>>                           pages = pages_left;
>>>>>           }
>>>>> -       spin_unlock(&mgr->lock);
>>>>> +
>>>>> +       /* Free unused pages for contiguous allocation */
>>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>>>> +
>>>>> +               mutex_lock(&mgr->lock);
>>>>> +               drm_buddy_block_trim(mm,
>>>>> +                                    actual_size,
>>>>> +                                    &node->blocks);
>>>>> +               mutex_unlock(&mgr->lock);
>>>>> +       }
>>>>> +
>>>>> +       list_for_each_entry(block, &node->blocks, link)
>>>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>>>> +
>>>>> +       block = list_first_entry_or_null(&node->blocks,
>>>>> +                                        struct drm_buddy_block,
>>>>> +                                        link);
>>>>> +       if (!block) {
>>>>> +               r = -ENOENT;
>>>>> +               goto error_free_res;
>>>>> +       }
>>>>> +
>>>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>>>> Hmm, does this work? It looks like there are various places checking
>>>> that res->start + res->num_pages <= visible_size, which IIUC should
>>>> only return true when the entire object is placed in the mappable
>>>> portion. i915 is doing something similar. Also it looks like
>>>> ttm_resource_compat() is potentially relying on this, like when moving
>>>> something from non-mappable -> mappable in
>>>> amdgpu_bo_fault_reserve_notify()?
>>>>
>>>> Perhaps something like:
>>>>
>>>> if (vis_usage == num_pages)
>>>>       base.start = 0;
>>>> else
>>>>       base.start = visible_size;
>>>>
>>>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>>>
>>> hmm, I wonder how it works, may be we didn't go through the corner case
>>> where res->start + res->num_pages > visible_size
>>>
>>> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
>>> set the ttm_place.lpfn = visible_pfn at
>>> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
>>> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
>>> alloc_range_bias() in drm_buddy.c.
>>>
>>> Here we get blocks chained together in random order complying
>>> visible_pfn range. say for instance num_pages = 13
>>> we may get,
>>> Block 1 addr - 500 (order-3)
>>> Block 2 addr - 400 (order-2)
>>> Block 3 addr - 600 (order-0)
>>>
>>> I think currently base.start = Block 1 start address fetched from the
>>> list and the address 500 assigned to it, which is good for the resource
>>> access since we access the blocks using the list link
>>>
>>> But for the check res->start + res->num_pages <= visible_size in few
>>> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
>>> doesn't work since the function looks for nodes in continuous address to
>>> calculate the start address. AFAIK, assigning the start address (400 +
>>> num_pages <= visible_size) mislead in our case since we use linked list
>>>
>>> how about replacing the check with a bool type return function which
>>> checks the each block start address + block size <= visible_size?
>>
>> Yeah, we already have that in the TTM code. It's just not used
>> everywhere IIRC.
> 
> Hi Christian,
> here we have a problem, many places in ttm and amdgpu, we are using the
> tbo->resource->start + bo->resource->num_pages operation, this doesn't
> work in case of drm buddy since it allocates blocks in different
> locations which are chained together using linked list.

AFAICT that was already the case with the existing code, since it looks 
like you can get an array of discontinuous drm_mm blocks. 
amdgpu_vram_mgr_virt_start() looks to be handling that by creating 
fake/virtual res->start offset, which is effectively the maximum end pfn 
over the range of drm_mm blocks(whilst also accounting for num_pages), 
and yes if there are multiple blocks then the res->start might be more 
of a "virtual" offset. AFAIK that scheme should also work as-is with the 
buddy.

>>
>> The node->start can just be set to the invalid offset for now and should
>> be removed as soon as we don't need it any more.
> Assigning the start block offset to resource->start doesn't work,
> If we set node->start to invalid offset, we get an incorrect value?
>>
>> Regards,
>> Christian.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dfdd3ec5f793..eb5a57ae3c5c 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -279,6 +279,7 @@  config DRM_AMDGPU
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
+	select DRM_BUDDY
 	help
 	  Choose this option if you have a recent AMD Radeon graphics card.
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index acfa207cf970..da12b4ff2e45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@ 
 #include <drm/ttm/ttm_resource.h>
 #include <drm/ttm/ttm_range_manager.h>
 
+#include "amdgpu_vram_mgr.h"
+
 /* state back for walking over vram_mgr and gtt_mgr allocations */
 struct amdgpu_res_cursor {
 	uint64_t		start;
 	uint64_t		size;
 	uint64_t		remaining;
-	struct drm_mm_node	*node;
+	void			*node;
+	uint32_t		mem_type;
 };
 
 /**
@@ -52,27 +55,63 @@  static inline void amdgpu_res_first(struct ttm_resource *res,
 				    uint64_t start, uint64_t size,
 				    struct amdgpu_res_cursor *cur)
 {
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
 	struct drm_mm_node *node;
 
-	if (!res || res->mem_type == TTM_PL_SYSTEM) {
-		cur->start = start;
-		cur->size = size;
-		cur->remaining = size;
-		cur->node = NULL;
-		WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
-		return;
-	}
+	if (!res)
+		goto err_out;
 
 	BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
 
-	node = to_ttm_range_mgr_node(res)->mm_nodes;
-	while (start >= node->size << PAGE_SHIFT)
-		start -= node++->size << PAGE_SHIFT;
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		head = &to_amdgpu_vram_mgr_node(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto err_out;
+
+		while (start >= amdgpu_node_size(block)) {
+			start -= amdgpu_node_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = amdgpu_node_start(block) + start;
+		cur->size = min(amdgpu_node_size(block) - start, size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	case TTM_PL_TT:
+		node = to_ttm_range_mgr_node(res)->mm_nodes;
+		while (start >= node->size << PAGE_SHIFT)
+			start -= node++->size << PAGE_SHIFT;
+
+		cur->start = (node->start << PAGE_SHIFT) + start;
+		cur->size = min((node->size << PAGE_SHIFT) - start, size);
+		cur->remaining = size;
+		cur->node = node;
+		break;
+	default:
+		goto err_out;
+	}
 
-	cur->start = (node->start << PAGE_SHIFT) + start;
-	cur->size = min((node->size << PAGE_SHIFT) - start, size);
+	return;
+
+err_out:
+	cur->start = start;
+	cur->size = size;
 	cur->remaining = size;
-	cur->node = node;
+	cur->node = NULL;
+	WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
+	return;
 }
 
 /**
@@ -85,7 +124,9 @@  static inline void amdgpu_res_first(struct ttm_resource *res,
  */
 static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 {
-	struct drm_mm_node *node = cur->node;
+	struct drm_buddy_block *block;
+	struct drm_mm_node *node;
+	struct list_head *next;
 
 	BUG_ON(size > cur->remaining);
 
@@ -99,9 +140,27 @@  static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 		return;
 	}
 
-	cur->node = ++node;
-	cur->start = node->start << PAGE_SHIFT;
-	cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+		cur->node = block;
+		cur->start = amdgpu_node_start(block);
+		cur->size = min(amdgpu_node_size(block), cur->remaining);
+		break;
+	case TTM_PL_TT:
+		node = cur->node;
+
+		cur->node = ++node;
+		cur->start = node->start << PAGE_SHIFT;
+		cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+		break;
+	default:
+		return;
+	}
 }
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f8f48be16d80..1a5cbd5ef738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,6 +26,7 @@ 
 
 #include <linux/dma-direction.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_buddy.h>
 #include "amdgpu.h"
 
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
@@ -40,12 +41,14 @@ 
 
 struct amdgpu_vram_mgr {
 	struct ttm_resource_manager manager;
-	struct drm_mm mm;
-	spinlock_t lock;
+	struct drm_buddy mm;
+	/* protects access to buffer objects */
+	struct mutex lock;
 	struct list_head reservations_pending;
 	struct list_head reserved_pages;
 	atomic64_t usage;
 	atomic64_t vis_usage;
+	u64 default_page_size;
 };
 
 struct amdgpu_gtt_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7442095f089c..64ce442c9119 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -32,8 +32,11 @@ 
 #include "atom.h"
 
 struct amdgpu_vram_reservation {
+	u64 start;
+	u64 size;
+	unsigned long flags;
+	struct list_head block;
 	struct list_head node;
-	struct drm_mm_node mm_node;
 };
 
 static inline struct amdgpu_vram_mgr *
@@ -194,10 +197,10 @@  const struct attribute_group amdgpu_vram_mgr_attr_group = {
  * Calculate how many bytes of the MM node are inside visible VRAM
  */
 static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
-				    struct drm_mm_node *node)
+				    struct drm_buddy_block *block)
 {
-	uint64_t start = node->start << PAGE_SHIFT;
-	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+	u64 start = amdgpu_node_start(block);
+	u64 end = start + amdgpu_node_size(block);
 
 	if (start >= adev->gmc.visible_vram_size)
 		return 0;
@@ -218,9 +221,9 @@  u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_resource *res = bo->tbo.resource;
-	unsigned pages = res->num_pages;
-	struct drm_mm_node *mm;
-	u64 usage;
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
+	struct drm_buddy_block *block;
+	u64 usage = 0;
 
 	if (amdgpu_gmc_vram_full_visible(&adev->gmc))
 		return amdgpu_bo_size(bo);
@@ -228,9 +231,8 @@  u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 	if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		return 0;
 
-	mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
-	for (usage = 0; pages; pages -= mm->size, mm++)
-		usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	list_for_each_entry(block, &node->blocks, link)
+		usage += amdgpu_vram_mgr_vis_size(adev, block);
 
 	return usage;
 }
@@ -240,21 +242,29 @@  static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	struct drm_mm *mm = &mgr->mm;
+	struct drm_buddy *mm = &mgr->mm;
 	struct amdgpu_vram_reservation *rsv, *temp;
+	struct drm_buddy_block *block;
 	uint64_t vis_usage;
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
-		if (drm_mm_reserve_node(mm, &rsv->mm_node))
+		if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+					   rsv->size, mm->chunk_size, &rsv->block,
+					   rsv->flags))
 			continue;
 
-		dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
-			rsv->mm_node.start, rsv->mm_node.size);
-
-		vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
-		atomic64_add(vis_usage, &mgr->vis_usage);
-		atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
-		list_move(&rsv->node, &mgr->reserved_pages);
+		block = list_first_entry_or_null(&rsv->block,
+						 struct drm_buddy_block,
+						 link);
+		if (block) {
+			dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
+				rsv->start, rsv->size);
+
+			vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+			atomic64_add(vis_usage, &mgr->vis_usage);
+			atomic64_add(rsv->size, &mgr->usage);
+			list_move(&rsv->node, &mgr->reserved_pages);
+		}
 	}
 }
 
@@ -277,13 +287,16 @@  int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&rsv->node);
-	rsv->mm_node.start = start >> PAGE_SHIFT;
-	rsv->mm_node.size = size >> PAGE_SHIFT;
+	INIT_LIST_HEAD(&rsv->block);
+
+	rsv->start = start;
+	rsv->size = size;
+	rsv->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
-	spin_lock(&mgr->lock);
-	list_add_tail(&mgr->reservations_pending, &rsv->node);
+	mutex_lock(&mgr->lock);
+	list_add_tail(&rsv->node, &mgr->reservations_pending);
 	amdgpu_vram_mgr_do_reserve(&mgr->manager);
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 
 	return 0;
 }
@@ -305,19 +318,19 @@  int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 	struct amdgpu_vram_reservation *rsv;
 	int ret;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 
 	list_for_each_entry(rsv, &mgr->reservations_pending, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = -EBUSY;
 			goto out;
 		}
 	}
 
 	list_for_each_entry(rsv, &mgr->reserved_pages, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = 0;
 			goto out;
 		}
@@ -325,32 +338,10 @@  int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 
 	ret = -ENOENT;
 out:
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 	return ret;
 }
 
-/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
-				       struct drm_mm_node *node)
-{
-	unsigned long start;
-
-	start = node->start + node->size;
-	if (start > mem->num_pages)
-		start -= mem->num_pages;
-	else
-		start = 0;
-	mem->start = max(mem->start, start);
-}
-
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -366,13 +357,13 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_resource **res)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+	unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
+	u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	uint64_t vis_usage = 0, mem_bytes, max_bytes;
-	struct ttm_range_mgr_node *node;
-	struct drm_mm *mm = &mgr->mm;
-	enum drm_mm_insert_mode mode;
+	struct amdgpu_vram_mgr_node *node;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	unsigned i;
 	int r;
 
@@ -391,10 +382,9 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		goto error_sub;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
+	else {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		pages_per_node = HPAGE_PMD_NR;
 #else
@@ -403,11 +393,9 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 #endif
 		pages_per_node = max_t(uint32_t, pages_per_node,
 				       tbo->page_alignment);
-		num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
 	}
 
-	node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
-			GFP_KERNEL | __GFP_ZERO);
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node) {
 		r = -ENOMEM;
 		goto error_sub;
@@ -415,9 +403,17 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 
 	ttm_resource_init(tbo, place, &node->base);
 
-	mode = DRM_MM_INSERT_BEST;
+	INIT_LIST_HEAD(&node->blocks);
+
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		mode = DRM_MM_INSERT_HIGH;
+		node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (place->fpfn || lpfn != man->size)
+		/* Allocate blocks in desired range */
+		node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	min_page_size = mgr->default_page_size;
+	BUG_ON(min_page_size < mm->chunk_size);
 
 	pages_left = node->base.num_pages;
 
@@ -425,36 +421,61 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
 
 	i = 0;
-	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = tbo->page_alignment;
-
 		if (pages >= pages_per_node)
-			alignment = pages_per_node;
-
-		r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
-						alignment, 0, place->fpfn,
-						lpfn, mode);
-		if (unlikely(r)) {
-			if (pages > pages_per_node) {
-				if (is_power_of_2(pages))
-					pages = pages / 2;
-				else
-					pages = rounddown_pow_of_two(pages);
-				continue;
-			}
-			goto error_free;
+			pages = pages_per_node;
+
+		n_pages = pages;
+
+		if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+			n_pages = roundup_pow_of_two(n_pages);
+			min_page_size = (u64)n_pages << PAGE_SHIFT;
+
+			if (n_pages > lpfn)
+				lpfn = n_pages;
 		}
 
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
-		amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
+		mutex_lock(&mgr->lock);
+		r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					   (u64)lpfn << PAGE_SHIFT,
+					   (u64)n_pages << PAGE_SHIFT,
+					   min_page_size,
+					   &node->blocks,
+					   node->flags);
+		mutex_unlock(&mgr->lock);
+		if (unlikely(r))
+			goto error_free_blocks;
+
 		pages_left -= pages;
 		++i;
 
 		if (pages > pages_left)
 			pages = pages_left;
 	}
-	spin_unlock(&mgr->lock);
+
+	/* Free unused pages for contiguous allocation */
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&mgr->lock);
+		drm_buddy_block_trim(mm,
+				     actual_size,
+				     &node->blocks);
+		mutex_unlock(&mgr->lock);
+	}
+
+	list_for_each_entry(block, &node->blocks, link)
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+	block = list_first_entry_or_null(&node->blocks,
+					 struct drm_buddy_block,
+					 link);
+	if (!block) {
+		r = -ENOENT;
+		goto error_free_res;
+	}
+
+	node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
 
 	if (i == 1)
 		node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
@@ -468,13 +489,13 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	*res = &node->base;
 	return 0;
 
-error_free:
-	while (i--)
-		drm_mm_remove_node(&node->mm_nodes[i]);
-	spin_unlock(&mgr->lock);
+error_free_blocks:
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
+error_free_res:
 	ttm_resource_fini(man, &node->base);
-	kvfree(node);
-
+	kfree(node);
 error_sub:
 	atomic64_sub(mem_bytes, &mgr->usage);
 	return r;
@@ -491,29 +512,29 @@  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 				struct ttm_resource *res)
 {
-	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	uint64_t usage = 0, vis_usage = 0;
-	unsigned i, pages;
 
-	spin_lock(&mgr->lock);
-	for (i = 0, pages = res->num_pages; pages;
-	     pages -= node->mm_nodes[i].size, ++i) {
-		struct drm_mm_node *mm = &node->mm_nodes[i];
-
-		drm_mm_remove_node(mm);
-		usage += mm->size << PAGE_SHIFT;
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	mutex_lock(&mgr->lock);
+	list_for_each_entry(block, &node->blocks, link) {
+		usage += amdgpu_node_size(block);
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
 	}
+
 	amdgpu_vram_mgr_do_reserve(man);
-	spin_unlock(&mgr->lock);
+
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(usage, &mgr->usage);
 	atomic64_sub(vis_usage, &mgr->vis_usage);
 
 	ttm_resource_fini(man, res);
-	kvfree(node);
+	kfree(node);
 }
 
 /**
@@ -663,10 +684,19 @@  static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
 
-	spin_lock(&mgr->lock);
-	drm_mm_print(&mgr->mm, printer);
-	spin_unlock(&mgr->lock);
+	drm_buddy_print(mm, printer);
+
+	drm_printf(printer, "reserved:\n");
+	list_for_each_entry(block, &mgr->reserved_pages, link)
+		drm_buddy_block_print(mm, block, printer);
+	mutex_unlock(&mgr->lock);
 
 	drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 		   man->size, amdgpu_vram_mgr_usage(mgr) >> 20,
@@ -690,16 +720,21 @@  int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
 
 	ttm_resource_manager_init(man, &adev->mman.bdev,
 				  adev->gmc.real_vram_size >> PAGE_SHIFT);
 
 	man->func = &amdgpu_vram_mgr_func;
 
-	drm_mm_init(&mgr->mm, 0, man->size);
-	spin_lock_init(&mgr->lock);
+	err = drm_buddy_init(&mgr->mm, man->size << PAGE_SHIFT, PAGE_SIZE);
+	if (err)
+		return err;
+
+	mutex_init(&mgr->lock);
 	INIT_LIST_HEAD(&mgr->reservations_pending);
 	INIT_LIST_HEAD(&mgr->reserved_pages);
+	mgr->default_page_size = PAGE_SIZE;
 
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
 	ttm_resource_manager_set_used(man, true);
@@ -727,16 +762,16 @@  void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 	if (ret)
 		return;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
-		drm_mm_remove_node(&rsv->mm_node);
+		drm_buddy_free_list(&mgr->mm, &rsv->block);
 		kfree(rsv);
 	}
-	drm_mm_takedown(&mgr->mm);
-	spin_unlock(&mgr->lock);
+	drm_buddy_fini(&mgr->mm);
+	mutex_unlock(&mgr->lock);
 
 	ttm_resource_manager_cleanup(man);
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);