diff mbox series

[1/3] drm/buddy: Fix contiguous memory allocation issues

Message ID 20230821101444.119601-1-Arunpravin.PaneerSelvam@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/buddy: Fix contiguous memory allocation issues | expand

Commit Message

Paneer Selvam, Arunpravin Aug. 21, 2023, 10:14 a.m. UTC
The way now contiguous requests are implemented such that
the size rounded up to power of 2 and the corresponding order
block picked from the freelist.

In addition to the older method, the new method will rounddown
the size to power of 2 and the corresponding order block picked
from the freelist. And for the remaining size we traverse the
tree and try to allocate either from the freelist block's buddy
or from the peer block. If the remaining size from peer/buddy
block is not free, we pick the next freelist block and repeat
the same method.

Moved contiguous/alignment size computation part and trim
function to the drm buddy manager.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c | 253 ++++++++++++++++++++++++++++++++++--
 include/drm/drm_buddy.h     |   6 +-
 2 files changed, 248 insertions(+), 11 deletions(-)

Comments

Christian König Aug. 21, 2023, 11:16 a.m. UTC | #1
Am 21.08.23 um 12:14 schrieb Arunpravin Paneer Selvam:
> The way now contiguous requests are implemented such that
> the size rounded up to power of 2 and the corresponding order
> block picked from the freelist.
>
> In addition to the older method, the new method will rounddown
> the size to power of 2 and the corresponding order block picked
> from the freelist. And for the remaining size we traverse the
> tree and try to allocate either from the freelist block's buddy
> or from the peer block. If the remaining size from peer/buddy
> block is not free, we pick the next freelist block and repeat
> the same method.

I think it's worth mentioning that Xinhui tried something similar a few 
month ago, but that didn't looked like it would work. For this here I'm 
more confident.

Of hand the implementation looks clean to me, but Matthew or others 
which have more background in how the implementation works need to take 
a look as well.

Thanks,
Christian.

>
> Moved contiguous/alignment size computation part and trim
> function to the drm buddy manager.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c | 253 ++++++++++++++++++++++++++++++++++--
>   include/drm/drm_buddy.h     |   6 +-
>   2 files changed, 248 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 7098f125b54a..220f60c08a03 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
>   	return __alloc_range(mm, &dfs, start, size, blocks);
>   }
>   
> +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
> +					       u64 size,
> +					       u64 min_block_size,
> +					       struct drm_buddy_block *block,
> +					       struct list_head *blocks)
> +{
> +	struct drm_buddy_block *buddy, *parent = NULL;
> +	u64 start, offset = 0;
> +	LIST_HEAD(dfs);
> +	int err;
> +
> +	if (!block)
> +		return -EINVAL;
> +
> +	buddy = __get_buddy(block);
> +	if (!buddy)
> +		return -ENOSPC;
> +
> +	if (drm_buddy_block_is_allocated(buddy))
> +		return -ENOSPC;
> +
> +	parent = block->parent;
> +	if (!parent)
> +		return -ENOSPC;
> +
> +	if (block->parent->right == block) {
> +		u64 remaining;
> +
> +		/* Compute the leftover size for allocation */
> +		remaining = max((size - drm_buddy_block_size(mm, buddy)),
> +				min_block_size);
> +		if (!IS_ALIGNED(remaining, min_block_size))
> +			remaining = round_up(remaining, min_block_size);
> +
> +		/* Check if remaining size is greater than buddy block size */
> +		if (drm_buddy_block_size(mm, buddy) < remaining)
> +			return -ENOSPC;
> +
> +		offset = drm_buddy_block_size(mm, buddy) - remaining;
> +	}
> +
> +	list_add(&parent->tmp_link, &dfs);
> +	start = drm_buddy_block_offset(parent) + offset;
> +
> +	err = __alloc_range(mm, &dfs, start, size, blocks);
> +	if (err)
> +		return -ENOSPC;
> +
> +	return 0;
> +}
> +
> +static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
> +					      u64 size,
> +					      u64 min_block_size,
> +					      struct drm_buddy_block *block,
> +					      struct list_head *blocks)
> +{
> +	struct drm_buddy_block *first, *peer, *tmp;
> +	struct drm_buddy_block *parent = NULL;
> +	u64 start, offset = 0;
> +	unsigned int order;
> +	LIST_HEAD(dfs);
> +	int err;
> +
> +	if (!block)
> +		return -EINVAL;
> +
> +	order = drm_buddy_block_order(block);
> +	/* Add freelist block to dfs list */
> +	list_add(&block->tmp_link, &dfs);
> +
> +	tmp = block;
> +	parent = block->parent;
> +	while (parent) {
> +		if (block->parent->left == block) {
> +			if (parent->left != tmp) {
> +				peer = parent->left;
> +				break;
> +			}
> +		} else {
> +			if (parent->right != tmp) {
> +				peer = parent->right;
> +				break;
> +			}
> +		}
> +
> +		tmp = parent;
> +		parent = tmp->parent;
> +	}
> +
> +	if (!parent)
> +		return -ENOSPC;
> +
> +	do {
> +		if (drm_buddy_block_is_allocated(peer))
> +			return -ENOSPC;
> +		/* Exit loop if peer block order is equal to block order */
> +		if (drm_buddy_block_order(peer) == order)
> +			break;
> +
> +		if (drm_buddy_block_is_split(peer)) {
> +			/* Traverse down to the block order level */
> +			if (block->parent->left == block)
> +				peer = peer->right;
> +			else
> +				peer = peer->left;
> +		} else {
> +			break;
> +		}
> +	} while (1);
> +
> +	if (block->parent->left == block) {
> +		u64 remaining;
> +
> +		/* Compute the leftover size for allocation */
> +		remaining = max((size - drm_buddy_block_size(mm, block)),
> +				min_block_size);
> +		if (!IS_ALIGNED(remaining, min_block_size))
> +			remaining = round_up(remaining, min_block_size);
> +
> +		/* Check if remaining size is greater than peer block size */
> +		if (drm_buddy_block_size(mm, peer) < remaining)
> +			return -ENOSPC;
> +
> +		offset = drm_buddy_block_size(mm, peer) - remaining;
> +		/* Add left peer block to dfs list */
> +		list_add(&peer->tmp_link, &dfs);
> +	} else {
> +		/* Add right peer block to dfs list */
> +		list_add_tail(&peer->tmp_link, &dfs);
> +	}
> +
> +	first = list_first_entry_or_null(&dfs,
> +					 struct drm_buddy_block,
> +					 tmp_link);
> +	if (!first)
> +		return -EINVAL;
> +
> +	start = drm_buddy_block_offset(first) + offset;
> +	err = __alloc_range(mm, &dfs, start, size, blocks);
> +	if (err)
> +		return -ENOSPC;
> +
> +	return 0;
> +}
> +
> +static int __drm_buddy_alloc_contiguous_blocks(struct drm_buddy *mm,
> +					       u64 size,
> +					       u64 min_block_size,
> +					       struct list_head *blocks)
> +{
> +	struct drm_buddy_block *block;
> +	struct list_head *list;
> +	unsigned long pages;
> +	unsigned int order;
> +	u64 modify_size;
> +	int err;
> +
> +	modify_size = rounddown_pow_of_two(size);
> +	pages = modify_size >> ilog2(mm->chunk_size);
> +	order = fls(pages) - 1;
> +	if (order == 0)
> +		return -ENOSPC;
> +
> +	list = &mm->free_list[order];
> +	if (list_empty(list))
> +		return -ENOSPC;
> +
> +	list_for_each_entry_reverse(block, list, link) {
> +		/* Allocate contiguous blocks from the buddy */
> +		err = __alloc_contiguous_block_from_buddy(mm,
> +							  size,
> +							  min_block_size,
> +							  block,
> +							  blocks);
> +		if (!err)
> +			return 0;
> +
> +		/* Allocate contiguous blocks from tree traversal method */
> +		err = __alloc_contiguous_block_from_peer(mm,
> +							 size,
> +							 min_block_size,
> +							 block,
> +							 blocks);
> +		if (!err)
> +			return 0;
> +	}
> +
> +	return -ENOSPC;
> +}
> +
>   /**
>    * drm_buddy_block_trim - free unused pages
>    *
> @@ -645,7 +836,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>    * @start: start of the allowed range for this block
>    * @end: end of the allowed range for this block
>    * @size: size of the allocation
> - * @min_page_size: alignment of the allocation
> + * @min_block_size: alignment of the allocation
>    * @blocks: output list head to add allocated blocks
>    * @flags: DRM_BUDDY_*_ALLOCATION flags
>    *
> @@ -660,23 +851,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>    */
>   int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			   u64 start, u64 end, u64 size,
> -			   u64 min_page_size,
> +			   u64 min_block_size,
>   			   struct list_head *blocks,
>   			   unsigned long flags)
>   {
>   	struct drm_buddy_block *block = NULL;
> +	u64 original_size, original_min_size;
>   	unsigned int min_order, order;
> -	unsigned long pages;
>   	LIST_HEAD(allocated);
> +	unsigned long pages;
>   	int err;
>   
>   	if (size < mm->chunk_size)
>   		return -EINVAL;
>   
> -	if (min_page_size < mm->chunk_size)
> +	if (min_block_size < mm->chunk_size)
>   		return -EINVAL;
>   
> -	if (!is_power_of_2(min_page_size))
> +	if (!is_power_of_2(min_block_size))
>   		return -EINVAL;
>   
>   	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> @@ -692,12 +884,21 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   	if (start + size == end)
>   		return __drm_buddy_alloc_range(mm, start, size, blocks);
>   
> -	if (!IS_ALIGNED(size, min_page_size))
> -		return -EINVAL;
> +	original_size = size;
> +	original_min_size = min_block_size;
> +
> +	/* Roundup the size to power of 2 */
> +	if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
> +		size = roundup_pow_of_two(size);
> +		min_block_size = size;
> +	/* Align size value to min_block_size */
> +	} else if (!IS_ALIGNED(size, min_block_size)) {
> +		size = round_up(size, min_block_size);
> +	}
>   
>   	pages = size >> ilog2(mm->chunk_size);
>   	order = fls(pages) - 1;
> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>   
>   	do {
>   		order = min(order, (unsigned int)fls(pages) - 1);
> @@ -716,6 +917,17 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   				break;
>   
>   			if (order-- == min_order) {
> +				if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
> +				    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
> +					/*
> +					 * Try contiguous block allocation through
> +					 * tree traversal method
> +					 */
> +					return __drm_buddy_alloc_contiguous_blocks(mm,
> +										   original_size,
> +										   original_min_size,
> +										   blocks);
> +
>   				err = -ENOSPC;
>   				goto err_free;
>   			}
> @@ -732,6 +944,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			break;
>   	} while (1);
>   
> +	/* Trim the allocated block to the required size */
> +	if (original_size != size) {
> +		struct list_head *trim_list;
> +		LIST_HEAD(temp);
> +		u64 trim_size;
> +
> +		trim_list = &allocated;
> +		trim_size = original_size;
> +
> +		if (!list_is_singular(&allocated)) {
> +			block = list_last_entry(&allocated, typeof(*block), link);
> +			list_move(&block->link, &temp);
> +			trim_list = &temp;
> +			trim_size = drm_buddy_block_size(mm, block) -
> +				(size - original_size);
> +		}
> +
> +		drm_buddy_block_trim(mm,
> +				     trim_size,
> +				     trim_list);
> +
> +		if (!list_empty(&temp))
> +			list_splice_tail(trim_list, &allocated);
> +	}
> +
>   	list_splice_tail(&allocated, blocks);
>   	return 0;
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 572077ff8ae7..a5b39fc01003 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -22,8 +22,9 @@
>   	start__ >= max__ || size__ > max__ - start__; \
>   })
>   
> -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
> +#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
> +#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
>   
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>   void drm_buddy_block_print(struct drm_buddy *mm,
>   			   struct drm_buddy_block *block,
>   			   struct drm_printer *p);
> -
>   #endif
Matthew Auld Aug. 21, 2023, 5:46 p.m. UTC | #2
Hi,

On 21/08/2023 11:14, Arunpravin Paneer Selvam wrote:
> The way now contiguous requests are implemented such that
> the size rounded up to power of 2 and the corresponding order
> block picked from the freelist.
> 
> In addition to the older method, the new method will rounddown
> the size to power of 2 and the corresponding order block picked
> from the freelist. And for the remaining size we traverse the
> tree and try to allocate either from the freelist block's buddy
> or from the peer block. If the remaining size from peer/buddy
> block is not free, we pick the next freelist block and repeat
> the same method.
> 
> Moved contiguous/alignment size computation part and trim
> function to the drm buddy manager.

I think we should also mention somewhere what issue this is trying to 
solve. IIUC the roundup_power_of_two() might in some cases trigger 
-ENOSPC even though there might be enough free space, and so to help 
with that we introduce a try harder mechanism.

> 
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c | 253 ++++++++++++++++++++++++++++++++++--
>   include/drm/drm_buddy.h     |   6 +-
>   2 files changed, 248 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 7098f125b54a..220f60c08a03 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
>   	return __alloc_range(mm, &dfs, start, size, blocks);
>   }
>   
> +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
> +					       u64 size,
> +					       u64 min_block_size,
> +					       struct drm_buddy_block *block,
> +					       struct list_head *blocks)
> +{
> +	struct drm_buddy_block *buddy, *parent = NULL;
> +	u64 start, offset = 0;
> +	LIST_HEAD(dfs);
> +	int err;
> +
> +	if (!block)
> +		return -EINVAL;
> +
> +	buddy = __get_buddy(block);
> +	if (!buddy)
> +		return -ENOSPC;
> +
> +	if (drm_buddy_block_is_allocated(buddy))
> +		return -ENOSPC;
> +
> +	parent = block->parent;
> +	if (!parent)
> +		return -ENOSPC;
> +
> +	if (block->parent->right == block) {
> +		u64 remaining;
> +
> +		/* Compute the leftover size for allocation */
> +		remaining = max((size - drm_buddy_block_size(mm, buddy)),
> +				min_block_size);
> +		if (!IS_ALIGNED(remaining, min_block_size))
> +			remaining = round_up(remaining, min_block_size);
> +
> +		/* Check if remaining size is greater than buddy block size */
> +		if (drm_buddy_block_size(mm, buddy) < remaining)
> +			return -ENOSPC;
> +
> +		offset = drm_buddy_block_size(mm, buddy) - remaining;
> +	}
> +
> +	list_add(&parent->tmp_link, &dfs);
> +	start = drm_buddy_block_offset(parent) + offset;
> +
> +	err = __alloc_range(mm, &dfs, start, size, blocks);
> +	if (err)
> +		return -ENOSPC;
> +
> +	return 0;
> +}
> +
> +static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
> +					      u64 size,
> +					      u64 min_block_size,
> +					      struct drm_buddy_block *block,
> +					      struct list_head *blocks)
> +{
> +	struct drm_buddy_block *first, *peer, *tmp;
> +	struct drm_buddy_block *parent = NULL;
> +	u64 start, offset = 0;
> +	unsigned int order;
> +	LIST_HEAD(dfs);
> +	int err;
> +
> +	if (!block)
> +		return -EINVAL;
> +
> +	order = drm_buddy_block_order(block);
> +	/* Add freelist block to dfs list */
> +	list_add(&block->tmp_link, &dfs);
> +
> +	tmp = block;
> +	parent = block->parent;
> +	while (parent) {
> +		if (block->parent->left == block) {
> +			if (parent->left != tmp) {
> +				peer = parent->left;
> +				break;
> +			}
> +		} else {
> +			if (parent->right != tmp) {
> +				peer = parent->right;
> +				break;
> +			}
> +		}
> +
> +		tmp = parent;
> +		parent = tmp->parent;
> +	}
> +
> +	if (!parent)
> +		return -ENOSPC;
> +
> +	do {
> +		if (drm_buddy_block_is_allocated(peer))
> +			return -ENOSPC;
> +		/* Exit loop if peer block order is equal to block order */
> +		if (drm_buddy_block_order(peer) == order)
> +			break;
> +
> +		if (drm_buddy_block_is_split(peer)) {
> +			/* Traverse down to the block order level */
> +			if (block->parent->left == block)
> +				peer = peer->right;
> +			else
> +				peer = peer->left;
> +		} else {
> +			break;
> +		}
> +	} while (1);
> +
> +	if (block->parent->left == block) {
> +		u64 remaining;
> +
> +		/* Compute the leftover size for allocation */
> +		remaining = max((size - drm_buddy_block_size(mm, block)),
> +				min_block_size);
> +		if (!IS_ALIGNED(remaining, min_block_size))
> +			remaining = round_up(remaining, min_block_size);
> +
> +		/* Check if remaining size is greater than peer block size */
> +		if (drm_buddy_block_size(mm, peer) < remaining)
> +			return -ENOSPC;
> +
> +		offset = drm_buddy_block_size(mm, peer) - remaining;
> +		/* Add left peer block to dfs list */
> +		list_add(&peer->tmp_link, &dfs);
> +	} else {
> +		/* Add right peer block to dfs list */
> +		list_add_tail(&peer->tmp_link, &dfs);
> +	}
> +
> +	first = list_first_entry_or_null(&dfs,
> +					 struct drm_buddy_block,
> +					 tmp_link);
> +	if (!first)
> +		return -EINVAL;
> +
> +	start = drm_buddy_block_offset(first) + offset;
> +	err = __alloc_range(mm, &dfs, start, size, blocks);
> +	if (err)
> +		return -ENOSPC;
> +
> +	return 0;
> +}
> +
> +static int __drm_buddy_alloc_contiguous_blocks(struct drm_buddy *mm,
> +					       u64 size,
> +					       u64 min_block_size,
> +					       struct list_head *blocks)
> +{
> +	struct drm_buddy_block *block;
> +	struct list_head *list;
> +	unsigned long pages;
> +	unsigned int order;
> +	u64 modify_size;
> +	int err;
> +
> +	modify_size = rounddown_pow_of_two(size);
> +	pages = modify_size >> ilog2(mm->chunk_size);
> +	order = fls(pages) - 1;
> +	if (order == 0)
> +		return -ENOSPC;
> +
> +	list = &mm->free_list[order];
> +	if (list_empty(list))
> +		return -ENOSPC;
> +
> +	list_for_each_entry_reverse(block, list, link) {
> +		/* Allocate contiguous blocks from the buddy */
> +		err = __alloc_contiguous_block_from_buddy(mm,
> +							  size,
> +							  min_block_size,
> +							  block,
> +							  blocks);
> +		if (!err)
> +			return 0;
> +
> +		/* Allocate contiguous blocks from tree traversal method */
> +		err = __alloc_contiguous_block_from_peer(mm,
> +							 size,
> +							 min_block_size,
> +							 block,
> +							 blocks);
> +		if (!err)
> +			return 0;
> +	}
> +
> +	return -ENOSPC;
> +}

Wondering if this would be a lot simpler if we can tweak alloc_range() 
to support allocating as much as it can up to some size? If it runs out 
of space it still returns an error but doesn't actually free what it has 
successfully allocated. It then also tells us how much it allocated. We 
can then allocate the rhs first and then from whatever is left we can 
figure out the precise offset we need for the lhs? I think that looks 
sort of similar to what the above does, but here we can for the most 
part just re-use alloc_range()? So maybe something like:

__alloc_range(..., u64 *total_allocated_on_err)
{
     ....
     err_free:
         if (err == -ENOSPC && total_allocated_on_err)
             *total_allocated_on_err = total_allocated;
         else
             drm_buddy_free_list(mm, &allocated);
         return err;
}

alloc_contig_try_harder()
{
      ....
      list_for_each_entry_reverse(b, list, link) {
          .....

          rhs_offset = block_offset(b);
          err =  __drm_buddy_alloc_range(mm, rhs_offset,
                                         size, &filled,
                                         blocks);
          if (!err || err != -ENOSPC)
              break;

          lhs_size = size - filled;
          lhs_offset = block_offset(b) - lhs_size;
          err =  __drm_buddy_alloc_range(mm, lhs_offset,
                                         lhs_size, NULL,
                                         blocks_lhs);
          list_splice(blocks_lhs, blocks);

          ....
      }
}

?

> +
>   /**
>    * drm_buddy_block_trim - free unused pages
>    *
> @@ -645,7 +836,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>    * @start: start of the allowed range for this block
>    * @end: end of the allowed range for this block
>    * @size: size of the allocation
> - * @min_page_size: alignment of the allocation
> + * @min_block_size: alignment of the allocation
>    * @blocks: output list head to add allocated blocks
>    * @flags: DRM_BUDDY_*_ALLOCATION flags
>    *
> @@ -660,23 +851,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>    */
>   int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			   u64 start, u64 end, u64 size,
> -			   u64 min_page_size,
> +			   u64 min_block_size,
>   			   struct list_head *blocks,
>   			   unsigned long flags)
>   {
>   	struct drm_buddy_block *block = NULL;
> +	u64 original_size, original_min_size;
>   	unsigned int min_order, order;
> -	unsigned long pages;
>   	LIST_HEAD(allocated);
> +	unsigned long pages;
>   	int err;
>   
>   	if (size < mm->chunk_size)
>   		return -EINVAL;
>   
> -	if (min_page_size < mm->chunk_size)
> +	if (min_block_size < mm->chunk_size)
>   		return -EINVAL;
>   
> -	if (!is_power_of_2(min_page_size))
> +	if (!is_power_of_2(min_block_size))
>   		return -EINVAL;
>   
>   	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> @@ -692,12 +884,21 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   	if (start + size == end)
>   		return __drm_buddy_alloc_range(mm, start, size, blocks);
>   
> -	if (!IS_ALIGNED(size, min_page_size))
> -		return -EINVAL;
> +	original_size = size;
> +	original_min_size = min_block_size;
> +
> +	/* Roundup the size to power of 2 */
> +	if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
> +		size = roundup_pow_of_two(size);
> +		min_block_size = size;
> +	/* Align size value to min_block_size */
> +	} else if (!IS_ALIGNED(size, min_block_size)) {
> +		size = round_up(size, min_block_size);
> +	}
>   
>   	pages = size >> ilog2(mm->chunk_size);
>   	order = fls(pages) - 1;
> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>   
>   	do {
>   		order = min(order, (unsigned int)fls(pages) - 1);
> @@ -716,6 +917,17 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   				break;
>   
>   			if (order-- == min_order) {
> +				if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
> +				    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
> +					/*
> +					 * Try contiguous block allocation through
> +					 * tree traversal method
> +					 */
> +					return __drm_buddy_alloc_contiguous_blocks(mm,
> +										   original_size,
> +										   original_min_size,
> +										   blocks);
> +
>   				err = -ENOSPC;
>   				goto err_free;
>   			}
> @@ -732,6 +944,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			break;
>   	} while (1);
>   
> +	/* Trim the allocated block to the required size */
> +	if (original_size != size) {
> +		struct list_head *trim_list;
> +		LIST_HEAD(temp);
> +		u64 trim_size;
> +
> +		trim_list = &allocated;
> +		trim_size = original_size;
> +
> +		if (!list_is_singular(&allocated)) {
> +			block = list_last_entry(&allocated, typeof(*block), link);
> +			list_move(&block->link, &temp);
> +			trim_list = &temp;
> +			trim_size = drm_buddy_block_size(mm, block) -
> +				(size - original_size);
> +		}
> +
> +		drm_buddy_block_trim(mm,
> +				     trim_size,
> +				     trim_list);
> +
> +		if (!list_empty(&temp))
> +			list_splice_tail(trim_list, &allocated);
> +	}
> +
>   	list_splice_tail(&allocated, blocks);
>   	return 0;
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 572077ff8ae7..a5b39fc01003 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -22,8 +22,9 @@
>   	start__ >= max__ || size__ > max__ - start__; \
>   })
>   
> -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
> +#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
> +#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
>   
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>   void drm_buddy_block_print(struct drm_buddy *mm,
>   			   struct drm_buddy_block *block,
>   			   struct drm_printer *p);
> -
>   #endif
Paneer Selvam, Arunpravin Aug. 22, 2023, 2:01 p.m. UTC | #3
On 21/08/23 10:46, Matthew Auld wrote:
> Hi,
>
> On 21/08/2023 11:14, Arunpravin Paneer Selvam wrote:
>> The way now contiguous requests are implemented such that
>> the size rounded up to power of 2 and the corresponding order
>> block picked from the freelist.
>>
>> In addition to the older method, the new method will rounddown
>> the size to power of 2 and the corresponding order block picked
>> from the freelist. And for the remaining size we traverse the
>> tree and try to allocate either from the freelist block's buddy
>> or from the peer block. If the remaining size from peer/buddy
>> block is not free, we pick the next freelist block and repeat
>> the same method.
>>
>> Moved contiguous/alignment size computation part and trim
>> function to the drm buddy manager.
>
> I think we should also mention somewhere what issue this is trying to 
> solve. IIUC the roundup_power_of_two() might in some cases trigger 
> -ENOSPC even though there might be enough free space, and so to help 
> with that we introduce a try harder mechanism.
Yes, we are trying to solve the above issue. I will add the problem 
statement to the commit description.
>
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c | 253 ++++++++++++++++++++++++++++++++++--
>>   include/drm/drm_buddy.h     |   6 +-
>>   2 files changed, 248 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 7098f125b54a..220f60c08a03 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct 
>> drm_buddy *mm,
>>       return __alloc_range(mm, &dfs, start, size, blocks);
>>   }
>>   +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
>> +                           u64 size,
>> +                           u64 min_block_size,
>> +                           struct drm_buddy_block *block,
>> +                           struct list_head *blocks)
>> +{
>> +    struct drm_buddy_block *buddy, *parent = NULL;
>> +    u64 start, offset = 0;
>> +    LIST_HEAD(dfs);
>> +    int err;
>> +
>> +    if (!block)
>> +        return -EINVAL;
>> +
>> +    buddy = __get_buddy(block);
>> +    if (!buddy)
>> +        return -ENOSPC;
>> +
>> +    if (drm_buddy_block_is_allocated(buddy))
>> +        return -ENOSPC;
>> +
>> +    parent = block->parent;
>> +    if (!parent)
>> +        return -ENOSPC;
>> +
>> +    if (block->parent->right == block) {
>> +        u64 remaining;
>> +
>> +        /* Compute the leftover size for allocation */
>> +        remaining = max((size - drm_buddy_block_size(mm, buddy)),
>> +                min_block_size);
>> +        if (!IS_ALIGNED(remaining, min_block_size))
>> +            remaining = round_up(remaining, min_block_size);
>> +
>> +        /* Check if remaining size is greater than buddy block size */
>> +        if (drm_buddy_block_size(mm, buddy) < remaining)
>> +            return -ENOSPC;
>> +
>> +        offset = drm_buddy_block_size(mm, buddy) - remaining;
>> +    }
>> +
>> +    list_add(&parent->tmp_link, &dfs);
>> +    start = drm_buddy_block_offset(parent) + offset;
>> +
>> +    err = __alloc_range(mm, &dfs, start, size, blocks);
>> +    if (err)
>> +        return -ENOSPC;
>> +
>> +    return 0;
>> +}
>> +
>> +static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
>> +                          u64 size,
>> +                          u64 min_block_size,
>> +                          struct drm_buddy_block *block,
>> +                          struct list_head *blocks)
>> +{
>> +    struct drm_buddy_block *first, *peer, *tmp;
>> +    struct drm_buddy_block *parent = NULL;
>> +    u64 start, offset = 0;
>> +    unsigned int order;
>> +    LIST_HEAD(dfs);
>> +    int err;
>> +
>> +    if (!block)
>> +        return -EINVAL;
>> +
>> +    order = drm_buddy_block_order(block);
>> +    /* Add freelist block to dfs list */
>> +    list_add(&block->tmp_link, &dfs);
>> +
>> +    tmp = block;
>> +    parent = block->parent;
>> +    while (parent) {
>> +        if (block->parent->left == block) {
>> +            if (parent->left != tmp) {
>> +                peer = parent->left;
>> +                break;
>> +            }
>> +        } else {
>> +            if (parent->right != tmp) {
>> +                peer = parent->right;
>> +                break;
>> +            }
>> +        }
>> +
>> +        tmp = parent;
>> +        parent = tmp->parent;
>> +    }
>> +
>> +    if (!parent)
>> +        return -ENOSPC;
>> +
>> +    do {
>> +        if (drm_buddy_block_is_allocated(peer))
>> +            return -ENOSPC;
>> +        /* Exit loop if peer block order is equal to block order */
>> +        if (drm_buddy_block_order(peer) == order)
>> +            break;
>> +
>> +        if (drm_buddy_block_is_split(peer)) {
>> +            /* Traverse down to the block order level */
>> +            if (block->parent->left == block)
>> +                peer = peer->right;
>> +            else
>> +                peer = peer->left;
>> +        } else {
>> +            break;
>> +        }
>> +    } while (1);
>> +
>> +    if (block->parent->left == block) {
>> +        u64 remaining;
>> +
>> +        /* Compute the leftover size for allocation */
>> +        remaining = max((size - drm_buddy_block_size(mm, block)),
>> +                min_block_size);
>> +        if (!IS_ALIGNED(remaining, min_block_size))
>> +            remaining = round_up(remaining, min_block_size);
>> +
>> +        /* Check if remaining size is greater than peer block size */
>> +        if (drm_buddy_block_size(mm, peer) < remaining)
>> +            return -ENOSPC;
>> +
>> +        offset = drm_buddy_block_size(mm, peer) - remaining;
>> +        /* Add left peer block to dfs list */
>> +        list_add(&peer->tmp_link, &dfs);
>> +    } else {
>> +        /* Add right peer block to dfs list */
>> +        list_add_tail(&peer->tmp_link, &dfs);
>> +    }
>> +
>> +    first = list_first_entry_or_null(&dfs,
>> +                     struct drm_buddy_block,
>> +                     tmp_link);
>> +    if (!first)
>> +        return -EINVAL;
>> +
>> +    start = drm_buddy_block_offset(first) + offset;
>> +    err = __alloc_range(mm, &dfs, start, size, blocks);
>> +    if (err)
>> +        return -ENOSPC;
>> +
>> +    return 0;
>> +}
>> +
>> +static int __drm_buddy_alloc_contiguous_blocks(struct drm_buddy *mm,
>> +                           u64 size,
>> +                           u64 min_block_size,
>> +                           struct list_head *blocks)
>> +{
>> +    struct drm_buddy_block *block;
>> +    struct list_head *list;
>> +    unsigned long pages;
>> +    unsigned int order;
>> +    u64 modify_size;
>> +    int err;
>> +
>> +    modify_size = rounddown_pow_of_two(size);
>> +    pages = modify_size >> ilog2(mm->chunk_size);
>> +    order = fls(pages) - 1;
>> +    if (order == 0)
>> +        return -ENOSPC;
>> +
>> +    list = &mm->free_list[order];
>> +    if (list_empty(list))
>> +        return -ENOSPC;
>> +
>> +    list_for_each_entry_reverse(block, list, link) {
>> +        /* Allocate contiguous blocks from the buddy */
>> +        err = __alloc_contiguous_block_from_buddy(mm,
>> +                              size,
>> +                              min_block_size,
>> +                              block,
>> +                              blocks);
>> +        if (!err)
>> +            return 0;
>> +
>> +        /* Allocate contiguous blocks from tree traversal method */
>> +        err = __alloc_contiguous_block_from_peer(mm,
>> +                             size,
>> +                             min_block_size,
>> +                             block,
>> +                             blocks);
>> +        if (!err)
>> +            return 0;
>> +    }
>> +
>> +    return -ENOSPC;
>> +}
>
> Wondering if this would be a lot simpler if we can tweak alloc_range() 
> to support allocating as much as it can up to some size? If it runs 
> out of space it still returns an error but doesn't actually free what 
> it has successfully allocated. It then also tells us how much it 
> allocated. We can then allocate the rhs first and then from whatever 
> is left we can figure out the precise offset we need for the lhs? I 
> think that looks sort of similar to what the above does, but here we 
> can for the most part just re-use alloc_range()? So maybe something like:
>
> __alloc_range(..., u64 *total_allocated_on_err)
> {
>     ....
>     err_free:
>         if (err == -ENOSPC && total_allocated_on_err)
>             *total_allocated_on_err = total_allocated;
>         else
>             drm_buddy_free_list(mm, &allocated);
>         return err;
> }
>
> alloc_contig_try_harder()
> {
>      ....
>      list_for_each_entry_reverse(b, list, link) {
>          .....
>
>          rhs_offset = block_offset(b);
>          err =  __drm_buddy_alloc_range(mm, rhs_offset,
>                                         size, &filled,
>                                         blocks);
>          if (!err || err != -ENOSPC)
>              break;
>
>          lhs_size = size - filled;
>          lhs_offset = block_offset(b) - lhs_size;
>          err =  __drm_buddy_alloc_range(mm, lhs_offset,
>                                         lhs_size, NULL,
>                                         blocks_lhs);
>          list_splice(blocks_lhs, blocks);
>
>          ....
>      }
> }
>
> ?
>
The difference between the above approach and this patch is that the 
above approach tries to allocate first from RHS and remaining size from 
LHS, but the patch tries to handle RHS and LHS separately through 
alloc_from_buddy() and alloc_from_peer() functions. I thought this would 
unblock the merge back operation of any one of the side (either LHS or 
RHS). If the above approach doesn't harm much, we will try and allocate 
from both the sides.

If we can add the immediate right block (i.e its buddy) to the dfs list 
for RHS traversal and if we could add only the immediate peer block for 
LHS traversal, this would eliminate the need for adding complete address 
space to the list and it might reduce the number of iterations.

Also, we should ALIGN the LHS remaining size to the min_block_size. 
Otherwise, I see glitches in some of the workloads.

>> +
>>   /**
>>    * drm_buddy_block_trim - free unused pages
>>    *
>> @@ -645,7 +836,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>>    * @start: start of the allowed range for this block
>>    * @end: end of the allowed range for this block
>>    * @size: size of the allocation
>> - * @min_page_size: alignment of the allocation
>> + * @min_block_size: alignment of the allocation
>>    * @blocks: output list head to add allocated blocks
>>    * @flags: DRM_BUDDY_*_ALLOCATION flags
>>    *
>> @@ -660,23 +851,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>>    */
>>   int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>                  u64 start, u64 end, u64 size,
>> -               u64 min_page_size,
>> +               u64 min_block_size,
>>                  struct list_head *blocks,
>>                  unsigned long flags)
>>   {
>>       struct drm_buddy_block *block = NULL;
>> +    u64 original_size, original_min_size;
>>       unsigned int min_order, order;
>> -    unsigned long pages;
>>       LIST_HEAD(allocated);
>> +    unsigned long pages;
>>       int err;
>>         if (size < mm->chunk_size)
>>           return -EINVAL;
>>   -    if (min_page_size < mm->chunk_size)
>> +    if (min_block_size < mm->chunk_size)
>>           return -EINVAL;
>>   -    if (!is_power_of_2(min_page_size))
>> +    if (!is_power_of_2(min_block_size))
>>           return -EINVAL;
>>         if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> @@ -692,12 +884,21 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>       if (start + size == end)
>>           return __drm_buddy_alloc_range(mm, start, size, blocks);
>>   -    if (!IS_ALIGNED(size, min_page_size))
>> -        return -EINVAL;
>> +    original_size = size;
>> +    original_min_size = min_block_size;
>> +
>> +    /* Roundup the size to power of 2 */
>> +    if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
>> +        size = roundup_pow_of_two(size);
>> +        min_block_size = size;
>> +    /* Align size value to min_block_size */
>> +    } else if (!IS_ALIGNED(size, min_block_size)) {
>> +        size = round_up(size, min_block_size);
>> +    }
>>         pages = size >> ilog2(mm->chunk_size);
>>       order = fls(pages) - 1;
>> -    min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +    min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>>         do {
>>           order = min(order, (unsigned int)fls(pages) - 1);
>> @@ -716,6 +917,17 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>                   break;
>>                 if (order-- == min_order) {
>> +                if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
>> +                    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
>> +                    /*
>> +                     * Try contiguous block allocation through
>> +                     * tree traversal method
>> +                     */
>> +                    return __drm_buddy_alloc_contiguous_blocks(mm,
>> +                                           original_size,
>> +                                           original_min_size,
>> +                                           blocks);
>> +
>>                   err = -ENOSPC;
>>                   goto err_free;
>>               }
>> @@ -732,6 +944,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>               break;
>>       } while (1);
>>   +    /* Trim the allocated block to the required size */
>> +    if (original_size != size) {
>> +        struct list_head *trim_list;
>> +        LIST_HEAD(temp);
>> +        u64 trim_size;
>> +
>> +        trim_list = &allocated;
>> +        trim_size = original_size;
>> +
>> +        if (!list_is_singular(&allocated)) {
>> +            block = list_last_entry(&allocated, typeof(*block), link);
>> +            list_move(&block->link, &temp);
>> +            trim_list = &temp;
>> +            trim_size = drm_buddy_block_size(mm, block) -
>> +                (size - original_size);
>> +        }
>> +
>> +        drm_buddy_block_trim(mm,
>> +                     trim_size,
>> +                     trim_list);
>> +
>> +        if (!list_empty(&temp))
>> +            list_splice_tail(trim_list, &allocated);
>> +    }
>> +
>>       list_splice_tail(&allocated, blocks);
>>       return 0;
>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 572077ff8ae7..a5b39fc01003 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -22,8 +22,9 @@
>>       start__ >= max__ || size__ > max__ - start__; \
>>   })
>>   -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
>> +#define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>> +#define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>     struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct 
>> drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>                  struct drm_buddy_block *block,
>>                  struct drm_printer *p);
>> -
>>   #endif
Christian König Aug. 23, 2023, 5:52 a.m. UTC | #4
Am 21.08.23 um 13:16 schrieb Christian König:
> Am 21.08.23 um 12:14 schrieb Arunpravin Paneer Selvam:
>> The way now contiguous requests are implemented such that
>> the size rounded up to power of 2 and the corresponding order
>> block picked from the freelist.
>>
>> In addition to the older method, the new method will rounddown
>> the size to power of 2 and the corresponding order block picked
>> from the freelist. And for the remaining size we traverse the
>> tree and try to allocate either from the freelist block's buddy
>> or from the peer block. If the remaining size from peer/buddy
>> block is not free, we pick the next freelist block and repeat
>> the same method.
>
> I think it's worth mentioning that Xinhui tried something similar a 
> few month ago, but that didn't looked like it would work. For this 
> here I'm more confident.
>
> Of hand the implementation looks clean to me, but Matthew or others 
> which have more background in how the implementation works need to 
> take a look as well.

One more thing I've just noticed, not sure if Matthew already noted it: 
When you mention "fix" in the subject line people might try to backport 
it, better write "improve" and drop the "issues" at the end.

Regards,
Christian.

>
> Thanks,
> Christian.
>
>>
>> Moved contiguous/alignment size computation part and trim
>> function to the drm buddy manager.
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c | 253 ++++++++++++++++++++++++++++++++++--
>>   include/drm/drm_buddy.h     |   6 +-
>>   2 files changed, 248 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 7098f125b54a..220f60c08a03 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct 
>> drm_buddy *mm,
>>       return __alloc_range(mm, &dfs, start, size, blocks);
>>   }
>>   +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
>> +                           u64 size,
>> +                           u64 min_block_size,
>> +                           struct drm_buddy_block *block,
>> +                           struct list_head *blocks)
>> +{
>> +    struct drm_buddy_block *buddy, *parent = NULL;
>> +    u64 start, offset = 0;
>> +    LIST_HEAD(dfs);
>> +    int err;
>> +
>> +    if (!block)
>> +        return -EINVAL;
>> +
>> +    buddy = __get_buddy(block);
>> +    if (!buddy)
>> +        return -ENOSPC;
>> +
>> +    if (drm_buddy_block_is_allocated(buddy))
>> +        return -ENOSPC;
>> +
>> +    parent = block->parent;
>> +    if (!parent)
>> +        return -ENOSPC;
>> +
>> +    if (block->parent->right == block) {
>> +        u64 remaining;
>> +
>> +        /* Compute the leftover size for allocation */
>> +        remaining = max((size - drm_buddy_block_size(mm, buddy)),
>> +                min_block_size);
>> +        if (!IS_ALIGNED(remaining, min_block_size))
>> +            remaining = round_up(remaining, min_block_size);
>> +
>> +        /* Check if remaining size is greater than buddy block size */
>> +        if (drm_buddy_block_size(mm, buddy) < remaining)
>> +            return -ENOSPC;
>> +
>> +        offset = drm_buddy_block_size(mm, buddy) - remaining;
>> +    }
>> +
>> +    list_add(&parent->tmp_link, &dfs);
>> +    start = drm_buddy_block_offset(parent) + offset;
>> +
>> +    err = __alloc_range(mm, &dfs, start, size, blocks);
>> +    if (err)
>> +        return -ENOSPC;
>> +
>> +    return 0;
>> +}
>> +
>> +static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
>> +                          u64 size,
>> +                          u64 min_block_size,
>> +                          struct drm_buddy_block *block,
>> +                          struct list_head *blocks)
>> +{
>> +    struct drm_buddy_block *first, *peer, *tmp;
>> +    struct drm_buddy_block *parent = NULL;
>> +    u64 start, offset = 0;
>> +    unsigned int order;
>> +    LIST_HEAD(dfs);
>> +    int err;
>> +
>> +    if (!block)
>> +        return -EINVAL;
>> +
>> +    order = drm_buddy_block_order(block);
>> +    /* Add freelist block to dfs list */
>> +    list_add(&block->tmp_link, &dfs);
>> +
>> +    tmp = block;
>> +    parent = block->parent;
>> +    while (parent) {
>> +        if (block->parent->left == block) {
>> +            if (parent->left != tmp) {
>> +                peer = parent->left;
>> +                break;
>> +            }
>> +        } else {
>> +            if (parent->right != tmp) {
>> +                peer = parent->right;
>> +                break;
>> +            }
>> +        }
>> +
>> +        tmp = parent;
>> +        parent = tmp->parent;
>> +    }
>> +
>> +    if (!parent)
>> +        return -ENOSPC;
>> +
>> +    do {
>> +        if (drm_buddy_block_is_allocated(peer))
>> +            return -ENOSPC;
>> +        /* Exit loop if peer block order is equal to block order */
>> +        if (drm_buddy_block_order(peer) == order)
>> +            break;
>> +
>> +        if (drm_buddy_block_is_split(peer)) {
>> +            /* Traverse down to the block order level */
>> +            if (block->parent->left == block)
>> +                peer = peer->right;
>> +            else
>> +                peer = peer->left;
>> +        } else {
>> +            break;
>> +        }
>> +    } while (1);
>> +
>> +    if (block->parent->left == block) {
>> +        u64 remaining;
>> +
>> +        /* Compute the leftover size for allocation */
>> +        remaining = max((size - drm_buddy_block_size(mm, block)),
>> +                min_block_size);
>> +        if (!IS_ALIGNED(remaining, min_block_size))
>> +            remaining = round_up(remaining, min_block_size);
>> +
>> +        /* Check if remaining size is greater than peer block size */
>> +        if (drm_buddy_block_size(mm, peer) < remaining)
>> +            return -ENOSPC;
>> +
>> +        offset = drm_buddy_block_size(mm, peer) - remaining;
>> +        /* Add left peer block to dfs list */
>> +        list_add(&peer->tmp_link, &dfs);
>> +    } else {
>> +        /* Add right peer block to dfs list */
>> +        list_add_tail(&peer->tmp_link, &dfs);
>> +    }
>> +
>> +    first = list_first_entry_or_null(&dfs,
>> +                     struct drm_buddy_block,
>> +                     tmp_link);
>> +    if (!first)
>> +        return -EINVAL;
>> +
>> +    start = drm_buddy_block_offset(first) + offset;
>> +    err = __alloc_range(mm, &dfs, start, size, blocks);
>> +    if (err)
>> +        return -ENOSPC;
>> +
>> +    return 0;
>> +}
>> +
>> +static int __drm_buddy_alloc_contiguous_blocks(struct drm_buddy *mm,
>> +                           u64 size,
>> +                           u64 min_block_size,
>> +                           struct list_head *blocks)
>> +{
>> +    struct drm_buddy_block *block;
>> +    struct list_head *list;
>> +    unsigned long pages;
>> +    unsigned int order;
>> +    u64 modify_size;
>> +    int err;
>> +
>> +    modify_size = rounddown_pow_of_two(size);
>> +    pages = modify_size >> ilog2(mm->chunk_size);
>> +    order = fls(pages) - 1;
>> +    if (order == 0)
>> +        return -ENOSPC;
>> +
>> +    list = &mm->free_list[order];
>> +    if (list_empty(list))
>> +        return -ENOSPC;
>> +
>> +    list_for_each_entry_reverse(block, list, link) {
>> +        /* Allocate contiguous blocks from the buddy */
>> +        err = __alloc_contiguous_block_from_buddy(mm,
>> +                              size,
>> +                              min_block_size,
>> +                              block,
>> +                              blocks);
>> +        if (!err)
>> +            return 0;
>> +
>> +        /* Allocate contiguous blocks from tree traversal method */
>> +        err = __alloc_contiguous_block_from_peer(mm,
>> +                             size,
>> +                             min_block_size,
>> +                             block,
>> +                             blocks);
>> +        if (!err)
>> +            return 0;
>> +    }
>> +
>> +    return -ENOSPC;
>> +}
>> +
>>   /**
>>    * drm_buddy_block_trim - free unused pages
>>    *
>> @@ -645,7 +836,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>>    * @start: start of the allowed range for this block
>>    * @end: end of the allowed range for this block
>>    * @size: size of the allocation
>> - * @min_page_size: alignment of the allocation
>> + * @min_block_size: alignment of the allocation
>>    * @blocks: output list head to add allocated blocks
>>    * @flags: DRM_BUDDY_*_ALLOCATION flags
>>    *
>> @@ -660,23 +851,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>>    */
>>   int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>                  u64 start, u64 end, u64 size,
>> -               u64 min_page_size,
>> +               u64 min_block_size,
>>                  struct list_head *blocks,
>>                  unsigned long flags)
>>   {
>>       struct drm_buddy_block *block = NULL;
>> +    u64 original_size, original_min_size;
>>       unsigned int min_order, order;
>> -    unsigned long pages;
>>       LIST_HEAD(allocated);
>> +    unsigned long pages;
>>       int err;
>>         if (size < mm->chunk_size)
>>           return -EINVAL;
>>   -    if (min_page_size < mm->chunk_size)
>> +    if (min_block_size < mm->chunk_size)
>>           return -EINVAL;
>>   -    if (!is_power_of_2(min_page_size))
>> +    if (!is_power_of_2(min_block_size))
>>           return -EINVAL;
>>         if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> @@ -692,12 +884,21 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>       if (start + size == end)
>>           return __drm_buddy_alloc_range(mm, start, size, blocks);
>>   -    if (!IS_ALIGNED(size, min_page_size))
>> -        return -EINVAL;
>> +    original_size = size;
>> +    original_min_size = min_block_size;
>> +
>> +    /* Roundup the size to power of 2 */
>> +    if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
>> +        size = roundup_pow_of_two(size);
>> +        min_block_size = size;
>> +    /* Align size value to min_block_size */
>> +    } else if (!IS_ALIGNED(size, min_block_size)) {
>> +        size = round_up(size, min_block_size);
>> +    }
>>         pages = size >> ilog2(mm->chunk_size);
>>       order = fls(pages) - 1;
>> -    min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +    min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>>         do {
>>           order = min(order, (unsigned int)fls(pages) - 1);
>> @@ -716,6 +917,17 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>                   break;
>>                 if (order-- == min_order) {
>> +                if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
>> +                    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
>> +                    /*
>> +                     * Try contiguous block allocation through
>> +                     * tree traversal method
>> +                     */
>> +                    return __drm_buddy_alloc_contiguous_blocks(mm,
>> +                                           original_size,
>> +                                           original_min_size,
>> +                                           blocks);
>> +
>>                   err = -ENOSPC;
>>                   goto err_free;
>>               }
>> @@ -732,6 +944,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>               break;
>>       } while (1);
>>   +    /* Trim the allocated block to the required size */
>> +    if (original_size != size) {
>> +        struct list_head *trim_list;
>> +        LIST_HEAD(temp);
>> +        u64 trim_size;
>> +
>> +        trim_list = &allocated;
>> +        trim_size = original_size;
>> +
>> +        if (!list_is_singular(&allocated)) {
>> +            block = list_last_entry(&allocated, typeof(*block), link);
>> +            list_move(&block->link, &temp);
>> +            trim_list = &temp;
>> +            trim_size = drm_buddy_block_size(mm, block) -
>> +                (size - original_size);
>> +        }
>> +
>> +        drm_buddy_block_trim(mm,
>> +                     trim_size,
>> +                     trim_list);
>> +
>> +        if (!list_empty(&temp))
>> +            list_splice_tail(trim_list, &allocated);
>> +    }
>> +
>>       list_splice_tail(&allocated, blocks);
>>       return 0;
>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 572077ff8ae7..a5b39fc01003 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -22,8 +22,9 @@
>>       start__ >= max__ || size__ > max__ - start__; \
>>   })
>>   -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
>> +#define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>> +#define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>     struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, struct 
>> drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>                  struct drm_buddy_block *block,
>>                  struct drm_printer *p);
>> -
>>   #endif
>
Paneer Selvam, Arunpravin Aug. 23, 2023, 6:35 a.m. UTC | #5
On 22/08/23 22:52, Christian König wrote:
> Am 21.08.23 um 13:16 schrieb Christian König:
>> Am 21.08.23 um 12:14 schrieb Arunpravin Paneer Selvam:
>>> The way now contiguous requests are implemented such that
>>> the size rounded up to power of 2 and the corresponding order
>>> block picked from the freelist.
>>>
>>> In addition to the older method, the new method will rounddown
>>> the size to power of 2 and the corresponding order block picked
>>> from the freelist. And for the remaining size we traverse the
>>> tree and try to allocate either from the freelist block's buddy
>>> or from the peer block. If the remaining size from peer/buddy
>>> block is not free, we pick the next freelist block and repeat
>>> the same method.
>>
>> I think it's worth mentioning that Xinhui tried something similar a 
>> few month ago, but that didn't looked like it would work. For this 
>> here I'm more confident.
>>
>> Of hand the implementation looks clean to me, but Matthew or others 
>> which have more background in how the implementation works need to 
>> take a look as well.
>
> One more thing I've just noticed, not sure if Matthew already noted 
> it: When you mention "fix" in the subject line people might try to 
> backport it, better write "improve" and drop the "issues" at the end.

I will modify in the next version.

Thanks,
Arun.

>
> Regards,
> Christian.
>
>>
>> Thanks,
>> Christian.
>>
>>>
>>> Moved contiguous/alignment size computation part and trim
>>> function to the drm buddy manager.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> ---
>>>   drivers/gpu/drm/drm_buddy.c | 253 
>>> ++++++++++++++++++++++++++++++++++--
>>>   include/drm/drm_buddy.h     |   6 +-
>>>   2 files changed, 248 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>> index 7098f125b54a..220f60c08a03 100644
>>> --- a/drivers/gpu/drm/drm_buddy.c
>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>> @@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct 
>>> drm_buddy *mm,
>>>       return __alloc_range(mm, &dfs, start, size, blocks);
>>>   }
>>>   +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
>>> +                           u64 size,
>>> +                           u64 min_block_size,
>>> +                           struct drm_buddy_block *block,
>>> +                           struct list_head *blocks)
>>> +{
>>> +    struct drm_buddy_block *buddy, *parent = NULL;
>>> +    u64 start, offset = 0;
>>> +    LIST_HEAD(dfs);
>>> +    int err;
>>> +
>>> +    if (!block)
>>> +        return -EINVAL;
>>> +
>>> +    buddy = __get_buddy(block);
>>> +    if (!buddy)
>>> +        return -ENOSPC;
>>> +
>>> +    if (drm_buddy_block_is_allocated(buddy))
>>> +        return -ENOSPC;
>>> +
>>> +    parent = block->parent;
>>> +    if (!parent)
>>> +        return -ENOSPC;
>>> +
>>> +    if (block->parent->right == block) {
>>> +        u64 remaining;
>>> +
>>> +        /* Compute the leftover size for allocation */
>>> +        remaining = max((size - drm_buddy_block_size(mm, buddy)),
>>> +                min_block_size);
>>> +        if (!IS_ALIGNED(remaining, min_block_size))
>>> +            remaining = round_up(remaining, min_block_size);
>>> +
>>> +        /* Check if remaining size is greater than buddy block size */
>>> +        if (drm_buddy_block_size(mm, buddy) < remaining)
>>> +            return -ENOSPC;
>>> +
>>> +        offset = drm_buddy_block_size(mm, buddy) - remaining;
>>> +    }
>>> +
>>> +    list_add(&parent->tmp_link, &dfs);
>>> +    start = drm_buddy_block_offset(parent) + offset;
>>> +
>>> +    err = __alloc_range(mm, &dfs, start, size, blocks);
>>> +    if (err)
>>> +        return -ENOSPC;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
>>> +                          u64 size,
>>> +                          u64 min_block_size,
>>> +                          struct drm_buddy_block *block,
>>> +                          struct list_head *blocks)
>>> +{
>>> +    struct drm_buddy_block *first, *peer, *tmp;
>>> +    struct drm_buddy_block *parent = NULL;
>>> +    u64 start, offset = 0;
>>> +    unsigned int order;
>>> +    LIST_HEAD(dfs);
>>> +    int err;
>>> +
>>> +    if (!block)
>>> +        return -EINVAL;
>>> +
>>> +    order = drm_buddy_block_order(block);
>>> +    /* Add freelist block to dfs list */
>>> +    list_add(&block->tmp_link, &dfs);
>>> +
>>> +    tmp = block;
>>> +    parent = block->parent;
>>> +    while (parent) {
>>> +        if (block->parent->left == block) {
>>> +            if (parent->left != tmp) {
>>> +                peer = parent->left;
>>> +                break;
>>> +            }
>>> +        } else {
>>> +            if (parent->right != tmp) {
>>> +                peer = parent->right;
>>> +                break;
>>> +            }
>>> +        }
>>> +
>>> +        tmp = parent;
>>> +        parent = tmp->parent;
>>> +    }
>>> +
>>> +    if (!parent)
>>> +        return -ENOSPC;
>>> +
>>> +    do {
>>> +        if (drm_buddy_block_is_allocated(peer))
>>> +            return -ENOSPC;
>>> +        /* Exit loop if peer block order is equal to block order */
>>> +        if (drm_buddy_block_order(peer) == order)
>>> +            break;
>>> +
>>> +        if (drm_buddy_block_is_split(peer)) {
>>> +            /* Traverse down to the block order level */
>>> +            if (block->parent->left == block)
>>> +                peer = peer->right;
>>> +            else
>>> +                peer = peer->left;
>>> +        } else {
>>> +            break;
>>> +        }
>>> +    } while (1);
>>> +
>>> +    if (block->parent->left == block) {
>>> +        u64 remaining;
>>> +
>>> +        /* Compute the leftover size for allocation */
>>> +        remaining = max((size - drm_buddy_block_size(mm, block)),
>>> +                min_block_size);
>>> +        if (!IS_ALIGNED(remaining, min_block_size))
>>> +            remaining = round_up(remaining, min_block_size);
>>> +
>>> +        /* Check if remaining size is greater than peer block size */
>>> +        if (drm_buddy_block_size(mm, peer) < remaining)
>>> +            return -ENOSPC;
>>> +
>>> +        offset = drm_buddy_block_size(mm, peer) - remaining;
>>> +        /* Add left peer block to dfs list */
>>> +        list_add(&peer->tmp_link, &dfs);
>>> +    } else {
>>> +        /* Add right peer block to dfs list */
>>> +        list_add_tail(&peer->tmp_link, &dfs);
>>> +    }
>>> +
>>> +    first = list_first_entry_or_null(&dfs,
>>> +                     struct drm_buddy_block,
>>> +                     tmp_link);
>>> +    if (!first)
>>> +        return -EINVAL;
>>> +
>>> +    start = drm_buddy_block_offset(first) + offset;
>>> +    err = __alloc_range(mm, &dfs, start, size, blocks);
>>> +    if (err)
>>> +        return -ENOSPC;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int __drm_buddy_alloc_contiguous_blocks(struct drm_buddy *mm,
>>> +                           u64 size,
>>> +                           u64 min_block_size,
>>> +                           struct list_head *blocks)
>>> +{
>>> +    struct drm_buddy_block *block;
>>> +    struct list_head *list;
>>> +    unsigned long pages;
>>> +    unsigned int order;
>>> +    u64 modify_size;
>>> +    int err;
>>> +
>>> +    modify_size = rounddown_pow_of_two(size);
>>> +    pages = modify_size >> ilog2(mm->chunk_size);
>>> +    order = fls(pages) - 1;
>>> +    if (order == 0)
>>> +        return -ENOSPC;
>>> +
>>> +    list = &mm->free_list[order];
>>> +    if (list_empty(list))
>>> +        return -ENOSPC;
>>> +
>>> +    list_for_each_entry_reverse(block, list, link) {
>>> +        /* Allocate contiguous blocks from the buddy */
>>> +        err = __alloc_contiguous_block_from_buddy(mm,
>>> +                              size,
>>> +                              min_block_size,
>>> +                              block,
>>> +                              blocks);
>>> +        if (!err)
>>> +            return 0;
>>> +
>>> +        /* Allocate contiguous blocks from tree traversal method */
>>> +        err = __alloc_contiguous_block_from_peer(mm,
>>> +                             size,
>>> +                             min_block_size,
>>> +                             block,
>>> +                             blocks);
>>> +        if (!err)
>>> +            return 0;
>>> +    }
>>> +
>>> +    return -ENOSPC;
>>> +}
>>> +
>>>   /**
>>>    * drm_buddy_block_trim - free unused pages
>>>    *
>>> @@ -645,7 +836,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>>>    * @start: start of the allowed range for this block
>>>    * @end: end of the allowed range for this block
>>>    * @size: size of the allocation
>>> - * @min_page_size: alignment of the allocation
>>> + * @min_block_size: alignment of the allocation
>>>    * @blocks: output list head to add allocated blocks
>>>    * @flags: DRM_BUDDY_*_ALLOCATION flags
>>>    *
>>> @@ -660,23 +851,24 @@ EXPORT_SYMBOL(drm_buddy_block_trim);
>>>    */
>>>   int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>                  u64 start, u64 end, u64 size,
>>> -               u64 min_page_size,
>>> +               u64 min_block_size,
>>>                  struct list_head *blocks,
>>>                  unsigned long flags)
>>>   {
>>>       struct drm_buddy_block *block = NULL;
>>> +    u64 original_size, original_min_size;
>>>       unsigned int min_order, order;
>>> -    unsigned long pages;
>>>       LIST_HEAD(allocated);
>>> +    unsigned long pages;
>>>       int err;
>>>         if (size < mm->chunk_size)
>>>           return -EINVAL;
>>>   -    if (min_page_size < mm->chunk_size)
>>> +    if (min_block_size < mm->chunk_size)
>>>           return -EINVAL;
>>>   -    if (!is_power_of_2(min_page_size))
>>> +    if (!is_power_of_2(min_block_size))
>>>           return -EINVAL;
>>>         if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>>> @@ -692,12 +884,21 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>       if (start + size == end)
>>>           return __drm_buddy_alloc_range(mm, start, size, blocks);
>>>   -    if (!IS_ALIGNED(size, min_page_size))
>>> -        return -EINVAL;
>>> +    original_size = size;
>>> +    original_min_size = min_block_size;
>>> +
>>> +    /* Roundup the size to power of 2 */
>>> +    if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
>>> +        size = roundup_pow_of_two(size);
>>> +        min_block_size = size;
>>> +    /* Align size value to min_block_size */
>>> +    } else if (!IS_ALIGNED(size, min_block_size)) {
>>> +        size = round_up(size, min_block_size);
>>> +    }
>>>         pages = size >> ilog2(mm->chunk_size);
>>>       order = fls(pages) - 1;
>>> -    min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>>> +    min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
>>>         do {
>>>           order = min(order, (unsigned int)fls(pages) - 1);
>>> @@ -716,6 +917,17 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>                   break;
>>>                 if (order-- == min_order) {
>>> +                if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
>>> +                    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
>>> +                    /*
>>> +                     * Try contiguous block allocation through
>>> +                     * tree traversal method
>>> +                     */
>>> +                    return __drm_buddy_alloc_contiguous_blocks(mm,
>>> +                                           original_size,
>>> +                                           original_min_size,
>>> +                                           blocks);
>>> +
>>>                   err = -ENOSPC;
>>>                   goto err_free;
>>>               }
>>> @@ -732,6 +944,31 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>               break;
>>>       } while (1);
>>>   +    /* Trim the allocated block to the required size */
>>> +    if (original_size != size) {
>>> +        struct list_head *trim_list;
>>> +        LIST_HEAD(temp);
>>> +        u64 trim_size;
>>> +
>>> +        trim_list = &allocated;
>>> +        trim_size = original_size;
>>> +
>>> +        if (!list_is_singular(&allocated)) {
>>> +            block = list_last_entry(&allocated, typeof(*block), link);
>>> +            list_move(&block->link, &temp);
>>> +            trim_list = &temp;
>>> +            trim_size = drm_buddy_block_size(mm, block) -
>>> +                (size - original_size);
>>> +        }
>>> +
>>> +        drm_buddy_block_trim(mm,
>>> +                     trim_size,
>>> +                     trim_list);
>>> +
>>> +        if (!list_empty(&temp))
>>> +            list_splice_tail(trim_list, &allocated);
>>> +    }
>>> +
>>>       list_splice_tail(&allocated, blocks);
>>>       return 0;
>>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>> index 572077ff8ae7..a5b39fc01003 100644
>>> --- a/include/drm/drm_buddy.h
>>> +++ b/include/drm/drm_buddy.h
>>> @@ -22,8 +22,9 @@
>>>       start__ >= max__ || size__ > max__ - start__; \
>>>   })
>>>   -#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>>> -#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
>>> +#define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>> +#define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>> +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>>     struct drm_buddy_block {
>>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>> @@ -155,5 +156,4 @@ void drm_buddy_print(struct drm_buddy *mm, 
>>> struct drm_printer *p);
>>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>>                  struct drm_buddy_block *block,
>>>                  struct drm_printer *p);
>>> -
>>>   #endif
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..220f60c08a03 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -569,6 +569,197 @@  static int __drm_buddy_alloc_range(struct drm_buddy *mm,
 	return __alloc_range(mm, &dfs, start, size, blocks);
 }
 
+static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
+					       u64 size,
+					       u64 min_block_size,
+					       struct drm_buddy_block *block,
+					       struct list_head *blocks)
+{
+	struct drm_buddy_block *buddy, *parent = NULL;
+	u64 start, offset = 0;
+	LIST_HEAD(dfs);
+	int err;
+
+	if (!block)
+		return -EINVAL;
+
+	buddy = __get_buddy(block);
+	if (!buddy)
+		return -ENOSPC;
+
+	if (drm_buddy_block_is_allocated(buddy))
+		return -ENOSPC;
+
+	parent = block->parent;
+	if (!parent)
+		return -ENOSPC;
+
+	if (block->parent->right == block) {
+		u64 remaining;
+
+		/* Compute the leftover size for allocation */
+		remaining = max((size - drm_buddy_block_size(mm, buddy)),
+				min_block_size);
+		if (!IS_ALIGNED(remaining, min_block_size))
+			remaining = round_up(remaining, min_block_size);
+
+		/* Check if remaining size is greater than buddy block size */
+		if (drm_buddy_block_size(mm, buddy) < remaining)
+			return -ENOSPC;
+
+		offset = drm_buddy_block_size(mm, buddy) - remaining;
+	}
+
+	list_add(&parent->tmp_link, &dfs);
+	start = drm_buddy_block_offset(parent) + offset;
+
+	err = __alloc_range(mm, &dfs, start, size, blocks);
+	if (err)
+		return -ENOSPC;
+
+	return 0;
+}
+
+static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
+					      u64 size,
+					      u64 min_block_size,
+					      struct drm_buddy_block *block,
+					      struct list_head *blocks)
+{
+	struct drm_buddy_block *first, *peer, *tmp;
+	struct drm_buddy_block *parent = NULL;
+	u64 start, offset = 0;
+	unsigned int order;
+	LIST_HEAD(dfs);
+	int err;
+
+	if (!block)
+		return -EINVAL;
+
+	order = drm_buddy_block_order(block);
+	/* Add freelist block to dfs list */
+	list_add(&block->tmp_link, &dfs);
+
+	tmp = block;
+	parent = block->parent;
+	while (parent) {
+		if (block->parent->left == block) {
+			if (parent->left != tmp) {
+				peer = parent->left;
+				break;
+			}
+		} else {
+			if (parent->right != tmp) {
+				peer = parent->right;
+				break;
+			}
+		}
+
+		tmp = parent;
+		parent = tmp->parent;
+	}
+
+	if (!parent)
+		return -ENOSPC;
+
+	do {
+		if (drm_buddy_block_is_allocated(peer))
+			return -ENOSPC;
+		/* Exit loop if peer block order is equal to block order */
+		if (drm_buddy_block_order(peer) == order)
+			break;
+
+		if (drm_buddy_block_is_split(peer)) {
+			/* Traverse down to the block order level */
+			if (block->parent->left == block)
+				peer = peer->right;
+			else
+				peer = peer->left;
+		} else {
+			break;
+		}
+	} while (1);
+
+	if (block->parent->left == block) {
+		u64 remaining;
+
+		/* Compute the leftover size for allocation */
+		remaining = max((size - drm_buddy_block_size(mm, block)),
+				min_block_size);
+		if (!IS_ALIGNED(remaining, min_block_size))
+			remaining = round_up(remaining, min_block_size);
+
+		/* Check if remaining size is greater than peer block size */
+		if (drm_buddy_block_size(mm, peer) < remaining)
+			return -ENOSPC;
+
+		offset = drm_buddy_block_size(mm, peer) - remaining;
+		/* Add left peer block to dfs list */
+		list_add(&peer->tmp_link, &dfs);
+	} else {
+		/* Add right peer block to dfs list */
+		list_add_tail(&peer->tmp_link, &dfs);
+	}
+
+	first = list_first_entry_or_null(&dfs,
+					 struct drm_buddy_block,
+					 tmp_link);
+	if (!first)
+		return -EINVAL;
+
+	start = drm_buddy_block_offset(first) + offset;
+	err = __alloc_range(mm, &dfs, start, size, blocks);
+	if (err)
+		return -ENOSPC;
+
+	return 0;
+}
+
+static int __drm_buddy_alloc_contiguous_blocks(struct drm_buddy *mm,
+					       u64 size,
+					       u64 min_block_size,
+					       struct list_head *blocks)
+{
+	struct drm_buddy_block *block;
+	struct list_head *list;
+	unsigned long pages;
+	unsigned int order;
+	u64 modify_size;
+	int err;
+
+	modify_size = rounddown_pow_of_two(size);
+	pages = modify_size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+	if (order == 0)
+		return -ENOSPC;
+
+	list = &mm->free_list[order];
+	if (list_empty(list))
+		return -ENOSPC;
+
+	list_for_each_entry_reverse(block, list, link) {
+		/* Allocate contiguous blocks from the buddy */
+		err = __alloc_contiguous_block_from_buddy(mm,
+							  size,
+							  min_block_size,
+							  block,
+							  blocks);
+		if (!err)
+			return 0;
+
+		/* Allocate contiguous blocks from tree traversal method */
+		err = __alloc_contiguous_block_from_peer(mm,
+							 size,
+							 min_block_size,
+							 block,
+							 blocks);
+		if (!err)
+			return 0;
+	}
+
+	return -ENOSPC;
+}
+
 /**
  * drm_buddy_block_trim - free unused pages
  *
@@ -645,7 +836,7 @@  EXPORT_SYMBOL(drm_buddy_block_trim);
  * @start: start of the allowed range for this block
  * @end: end of the allowed range for this block
  * @size: size of the allocation
- * @min_page_size: alignment of the allocation
+ * @min_block_size: alignment of the allocation
  * @blocks: output list head to add allocated blocks
  * @flags: DRM_BUDDY_*_ALLOCATION flags
  *
@@ -660,23 +851,24 @@  EXPORT_SYMBOL(drm_buddy_block_trim);
  */
 int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   u64 start, u64 end, u64 size,
-			   u64 min_page_size,
+			   u64 min_block_size,
 			   struct list_head *blocks,
 			   unsigned long flags)
 {
 	struct drm_buddy_block *block = NULL;
+	u64 original_size, original_min_size;
 	unsigned int min_order, order;
-	unsigned long pages;
 	LIST_HEAD(allocated);
+	unsigned long pages;
 	int err;
 
 	if (size < mm->chunk_size)
 		return -EINVAL;
 
-	if (min_page_size < mm->chunk_size)
+	if (min_block_size < mm->chunk_size)
 		return -EINVAL;
 
-	if (!is_power_of_2(min_page_size))
+	if (!is_power_of_2(min_block_size))
 		return -EINVAL;
 
 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
@@ -692,12 +884,21 @@  int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	if (start + size == end)
 		return __drm_buddy_alloc_range(mm, start, size, blocks);
 
-	if (!IS_ALIGNED(size, min_page_size))
-		return -EINVAL;
+	original_size = size;
+	original_min_size = min_block_size;
+
+	/* Roundup the size to power of 2 */
+	if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
+		size = roundup_pow_of_two(size);
+		min_block_size = size;
+	/* Align size value to min_block_size */
+	} else if (!IS_ALIGNED(size, min_block_size)) {
+		size = round_up(size, min_block_size);
+	}
 
 	pages = size >> ilog2(mm->chunk_size);
 	order = fls(pages) - 1;
-	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
 
 	do {
 		order = min(order, (unsigned int)fls(pages) - 1);
@@ -716,6 +917,17 @@  int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 				break;
 
 			if (order-- == min_order) {
+				if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
+				    !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+					/*
+					 * Try contiguous block allocation through
+					 * tree traversal method
+					 */
+					return __drm_buddy_alloc_contiguous_blocks(mm,
+										   original_size,
+										   original_min_size,
+										   blocks);
+
 				err = -ENOSPC;
 				goto err_free;
 			}
@@ -732,6 +944,31 @@  int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			break;
 	} while (1);
 
+	/* Trim the allocated block to the required size */
+	if (original_size != size) {
+		struct list_head *trim_list;
+		LIST_HEAD(temp);
+		u64 trim_size;
+
+		trim_list = &allocated;
+		trim_size = original_size;
+
+		if (!list_is_singular(&allocated)) {
+			block = list_last_entry(&allocated, typeof(*block), link);
+			list_move(&block->link, &temp);
+			trim_list = &temp;
+			trim_size = drm_buddy_block_size(mm, block) -
+				(size - original_size);
+		}
+
+		drm_buddy_block_trim(mm,
+				     trim_size,
+				     trim_list);
+
+		if (!list_empty(&temp))
+			list_splice_tail(trim_list, &allocated);
+	}
+
 	list_splice_tail(&allocated, blocks);
 	return 0;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 572077ff8ae7..a5b39fc01003 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -22,8 +22,9 @@ 
 	start__ >= max__ || size__ > max__ - start__; \
 })
 
-#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
-#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
+#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
@@ -155,5 +156,4 @@  void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_block_print(struct drm_buddy *mm,
 			   struct drm_buddy_block *block,
 			   struct drm_printer *p);
-
 #endif