diff mbox series

[v3,4/6] drm: implement a method to free unused pages

Message ID 20211116201807.147486-4-Arunpravin.PaneerSelvam@amd.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/6] drm: move the buddy allocator from i915 into common drm | expand

Commit Message

Paneer Selvam, Arunpravin Nov. 16, 2021, 8:18 p.m. UTC
On contiguous allocation, we round up the size
to the *next* power of 2, implement a function
to free the unused pages after the newly allocate block.

v2(Matthew Auld):
  - replace function name 'drm_buddy_free_unused_pages' with
    drm_buddy_block_trim
  - replace input argument name 'actual_size' with 'new_size'
  - add more validation checks for input arguments
  - add overlaps check to avoid needless searching and splitting
  - merged the below patch to see the feature in action
    - add free unused pages support to i915 driver
  - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
    are all globally visible

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 108 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  10 ++
 include/drm/drm_buddy.h                       |   4 +
 3 files changed, 122 insertions(+)

Comments

Matthew Auld Nov. 17, 2021, 7:02 p.m. UTC | #1
On 16/11/2021 20:18, Arunpravin wrote:
> On contiguous allocation, we round up the size
> to the *next* power of 2, implement a function
> to free the unused pages after the newly allocate block.
> 
> v2(Matthew Auld):
>    - replace function name 'drm_buddy_free_unused_pages' with
>      drm_buddy_block_trim
>    - replace input argument name 'actual_size' with 'new_size'
>    - add more validation checks for input arguments
>    - add overlaps check to avoid needless searching and splitting
>    - merged the below patch to see the feature in action
>      - add free unused pages support to i915 driver
>    - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
>      are all globally visible
> 
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c                   | 108 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  10 ++
>   include/drm/drm_buddy.h                       |   4 +
>   3 files changed, 122 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 0a9db2981188..943fe2ad27bf 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -284,6 +284,114 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>   	return s1 <= s2 && e1 >= e2;
>   }
>   
> +/**
> + * drm_buddy_block_trim - free unused pages
> + *
> + * @mm: DRM buddy manager
> + * @new_size: original size requested
> + * @blocks: output list head to add allocated blocks
> + *
> + * For contiguous allocation, we round up the size to the nearest
> + * power of two value, drivers consume *actual* size, so remaining
> + * portions are unused and it can be freed.
> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +int drm_buddy_block_trim(struct drm_buddy_mm *mm,
> +			 u64 new_size,
> +			 struct list_head *blocks)
> +{
> +	struct drm_buddy_block *block;
> +	struct drm_buddy_block *buddy;
> +	u64 new_start;
> +	u64 new_end;
> +	LIST_HEAD(dfs);
> +	u64 count = 0;
> +	int err;
> +
> +	if (!list_is_singular(blocks))
> +		return -EINVAL;
> +
> +	block = list_first_entry(blocks,
> +				 struct drm_buddy_block,
> +				 link);
> +
> +	if (!drm_buddy_block_is_allocated(block))
> +		return -EINVAL;
> +
> +	if (new_size > drm_buddy_block_size(mm, block))
> +		return -EINVAL;
> +
> +	if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
> +		return -EINVAL;
> +
> +	if (new_size == drm_buddy_block_size(mm, block))
> +		return 0;
> +
> +	list_del(&block->link);
> +
> +	new_start = drm_buddy_block_offset(block);
> +	new_end = new_start + new_size - 1;
> +
> +	mark_free(mm, block);
> +
> +	list_add(&block->tmp_link, &dfs);
> +
> +	do {
> +		u64 block_start;
> +		u64 block_end;
> +
> +		block = list_first_entry_or_null(&dfs,
> +						 struct drm_buddy_block,
> +						 tmp_link);
> +		if (!block)
> +			break;
> +
> +		list_del(&block->tmp_link);
> +
> +		if (count == new_size)
> +			return 0;
> +
> +		block_start = drm_buddy_block_offset(block);
> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> +
> +		if (!overlaps(new_start, new_end, block_start, block_end))
> +			continue;
> +
> +		if (contains(new_start, new_end, block_start, block_end)) {
> +			BUG_ON(!drm_buddy_block_is_free(block));
> +
> +			/* Allocate only required blocks */
> +			mark_allocated(block);
> +			mm->avail -= drm_buddy_block_size(mm, block);
> +			list_add_tail(&block->link, blocks);
> +			count += drm_buddy_block_size(mm, block);
> +			continue;
> +		}
> +
> +		if (!drm_buddy_block_is_split(block)) {

Should always be true, right? But I guess depends if we want to re-use 
this for generic range allocation...

> +			err = split_block(mm, block);
> +			if (unlikely(err))
> +				goto err_undo;
> +		}
> +
> +		list_add(&block->right->tmp_link, &dfs);
> +		list_add(&block->left->tmp_link, &dfs);
> +	} while (1);
> +
> +	return -ENOSPC;
> +
> +err_undo:
> +	buddy = get_buddy(block);
> +	if (buddy &&
> +	    (drm_buddy_block_is_free(block) &&
> +	     drm_buddy_block_is_free(buddy)))
> +		__drm_buddy_free(mm, block);
> +	return err;

Looking at the split_block failure path. The user allocated some block, 
and then tried to trim it, but this then marks it as free and removes it 
from their original list(potentially also freeing it), if we fail here. 
Would it be better to leave that decision to the user? If the trim() 
fails, worse case we get some internal fragmentation, and the user might 
be totally fine with that? I guess we could leave as-is, but for sure 
needs some big comment somewhere.

> +}
> +EXPORT_SYMBOL(drm_buddy_block_trim);
> +
>   static struct drm_buddy_block *
>   alloc_range(struct drm_buddy_mm *mm,
>   	    u64 start, u64 end,
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index b6ed5b37cf18..5e1f8c443058 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	if (unlikely(err))
>   		goto err_free_blocks;
>   
> +	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +		mutex_lock(&bman->lock);
> +		err = drm_buddy_block_trim(mm,
> +				(u64)n_pages << PAGE_SHIFT,
> +				&bman_res->blocks);
> +		mutex_unlock(&bman->lock);
> +		if (unlikely(err))
> +			goto err_free_blocks;

Yeah, so maybe we could in theory treat this as best effort? But I guess 
unlikely to ever hit this anyway, so meh.

> +	}
> +
>   	*res = &bman_res->base;
>   	return 0;
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index cbe5e648440a..36e8f548acf7 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -145,6 +145,10 @@ int drm_buddy_alloc(struct drm_buddy_mm *mm,
>   		    struct list_head *blocks,
>   		    unsigned long flags);
>   
> +int drm_buddy_block_trim(struct drm_buddy_mm *mm,
> +			 u64 new_size,
> +			 struct list_head *blocks);
> +
>   void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block *block);
>   
>   void drm_buddy_free_list(struct drm_buddy_mm *mm, struct list_head *objects);
>
Paneer Selvam, Arunpravin Nov. 23, 2021, 10:40 p.m. UTC | #2
On 18/11/21 12:32 am, Matthew Auld wrote:
> On 16/11/2021 20:18, Arunpravin wrote:
>> On contiguous allocation, we round up the size
>> to the *next* power of 2, implement a function
>> to free the unused pages after the newly allocate block.
>>
>> v2(Matthew Auld):
>>    - replace function name 'drm_buddy_free_unused_pages' with
>>      drm_buddy_block_trim
>>    - replace input argument name 'actual_size' with 'new_size'
>>    - add more validation checks for input arguments
>>    - add overlaps check to avoid needless searching and splitting
>>    - merged the below patch to see the feature in action
>>      - add free unused pages support to i915 driver
>>    - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
>>      are all globally visible
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c                   | 108 ++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  10 ++
>>   include/drm/drm_buddy.h                       |   4 +
>>   3 files changed, 122 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 0a9db2981188..943fe2ad27bf 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -284,6 +284,114 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>>   	return s1 <= s2 && e1 >= e2;
>>   }
>>   
>> +/**
>> + * drm_buddy_block_trim - free unused pages
>> + *
>> + * @mm: DRM buddy manager
>> + * @new_size: original size requested
>> + * @blocks: output list head to add allocated blocks
>> + *
>> + * For contiguous allocation, we round up the size to the nearest
>> + * power of two value, drivers consume *actual* size, so remaining
>> + * portions are unused and it can be freed.
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +int drm_buddy_block_trim(struct drm_buddy_mm *mm,
>> +			 u64 new_size,
>> +			 struct list_head *blocks)
>> +{
>> +	struct drm_buddy_block *block;
>> +	struct drm_buddy_block *buddy;
>> +	u64 new_start;
>> +	u64 new_end;
>> +	LIST_HEAD(dfs);
>> +	u64 count = 0;
>> +	int err;
>> +
>> +	if (!list_is_singular(blocks))
>> +		return -EINVAL;
>> +
>> +	block = list_first_entry(blocks,
>> +				 struct drm_buddy_block,
>> +				 link);
>> +
>> +	if (!drm_buddy_block_is_allocated(block))
>> +		return -EINVAL;
>> +
>> +	if (new_size > drm_buddy_block_size(mm, block))
>> +		return -EINVAL;
>> +
>> +	if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
>> +		return -EINVAL;
>> +
>> +	if (new_size == drm_buddy_block_size(mm, block))
>> +		return 0;
>> +
>> +	list_del(&block->link);
>> +
>> +	new_start = drm_buddy_block_offset(block);
>> +	new_end = new_start + new_size - 1;
>> +
>> +	mark_free(mm, block);
>> +
>> +	list_add(&block->tmp_link, &dfs);
>> +
>> +	do {
>> +		u64 block_start;
>> +		u64 block_end;
>> +
>> +		block = list_first_entry_or_null(&dfs,
>> +						 struct drm_buddy_block,
>> +						 tmp_link);
>> +		if (!block)
>> +			break;
>> +
>> +		list_del(&block->tmp_link);
>> +
>> +		if (count == new_size)
>> +			return 0;
>> +
>> +		block_start = drm_buddy_block_offset(block);
>> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>> +
>> +		if (!overlaps(new_start, new_end, block_start, block_end))
>> +			continue;
>> +
>> +		if (contains(new_start, new_end, block_start, block_end)) {
>> +			BUG_ON(!drm_buddy_block_is_free(block));
>> +
>> +			/* Allocate only required blocks */
>> +			mark_allocated(block);
>> +			mm->avail -= drm_buddy_block_size(mm, block);
>> +			list_add_tail(&block->link, blocks);
>> +			count += drm_buddy_block_size(mm, block);
>> +			continue;
>> +		}
>> +
>> +		if (!drm_buddy_block_is_split(block)) {
> 
> Should always be true, right? But I guess depends if we want to re-use 
> this for generic range allocation...
yes, since we re-use this for generic range allocation I think we can
keep this check
> 
>> +			err = split_block(mm, block);
>> +			if (unlikely(err))
>> +				goto err_undo;
>> +		}
>> +
>> +		list_add(&block->right->tmp_link, &dfs);
>> +		list_add(&block->left->tmp_link, &dfs);
>> +	} while (1);
>> +
>> +	return -ENOSPC;
>> +
>> +err_undo:
>> +	buddy = get_buddy(block);
>> +	if (buddy &&
>> +	    (drm_buddy_block_is_free(block) &&
>> +	     drm_buddy_block_is_free(buddy)))
>> +		__drm_buddy_free(mm, block);
>> +	return err;
> 
> Looking at the split_block failure path. The user allocated some block, 
> and then tried to trim it, but this then marks it as free and removes it 
> from their original list(potentially also freeing it), if we fail here. 
> Would it be better to leave that decision to the user? If the trim() 
> fails, worse case we get some internal fragmentation, and the user might 
> be totally fine with that? I guess we could leave as-is, but for sure 
> needs some big comment somewhere.

Agreed

would it make sense to add a bool type input argument, so that we can
skip the split_block failure path in case of block_trim().

__alloc_range(mm, &dfs, start, size, ... bool trim)

err = split_block(mm, block);
if (unlikely(err)) {
	/* I think we can add big comment here for trim case */
	if (trim)
		return err;

goto err_undo;
}

> 
>> +}
>> +EXPORT_SYMBOL(drm_buddy_block_trim);
>> +
>>   static struct drm_buddy_block *
>>   alloc_range(struct drm_buddy_mm *mm,
>>   	    u64 start, u64 end,
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index b6ed5b37cf18..5e1f8c443058 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	if (unlikely(err))
>>   		goto err_free_blocks;
>>   
>> +	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +		mutex_lock(&bman->lock);
>> +		err = drm_buddy_block_trim(mm,
>> +				(u64)n_pages << PAGE_SHIFT,
>> +				&bman_res->blocks);
>> +		mutex_unlock(&bman->lock);
>> +		if (unlikely(err))
>> +			goto err_free_blocks;
> 
> Yeah, so maybe we could in theory treat this as best effort? But I guess 
> unlikely to ever hit this anyway, so meh.
so I understood that we remove the below 2 lines
if (unlikely(err))
	goto err_free_blocks;

would it make sense to print a WARN msg
WARN(err < 0, "Trim failed returning %d for ttm_resource(%llu)\n",
     err, &bman_res->base);
> 
>> +	}
>> +
>>   	*res = &bman_res->base;
>>   	return 0;
>>   
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index cbe5e648440a..36e8f548acf7 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -145,6 +145,10 @@ int drm_buddy_alloc(struct drm_buddy_mm *mm,
>>   		    struct list_head *blocks,
>>   		    unsigned long flags);
>>   
>> +int drm_buddy_block_trim(struct drm_buddy_mm *mm,
>> +			 u64 new_size,
>> +			 struct list_head *blocks);
>> +
>>   void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block *block);
>>   
>>   void drm_buddy_free_list(struct drm_buddy_mm *mm, struct list_head *objects);
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 0a9db2981188..943fe2ad27bf 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -284,6 +284,114 @@  static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
 	return s1 <= s2 && e1 >= e2;
 }
 
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: output list head to add allocated blocks
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and it can be freed.
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy_mm *mm,
+			 u64 new_size,
+			 struct list_head *blocks)
+{
+	struct drm_buddy_block *block;
+	struct drm_buddy_block *buddy;
+	u64 new_start;
+	u64 new_end;
+	LIST_HEAD(dfs);
+	u64 count = 0;
+	int err;
+
+	if (!list_is_singular(blocks))
+		return -EINVAL;
+
+	block = list_first_entry(blocks,
+				 struct drm_buddy_block,
+				 link);
+
+	if (!drm_buddy_block_is_allocated(block))
+		return -EINVAL;
+
+	if (new_size > drm_buddy_block_size(mm, block))
+		return -EINVAL;
+
+	if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
+		return -EINVAL;
+
+	if (new_size == drm_buddy_block_size(mm, block))
+		return 0;
+
+	list_del(&block->link);
+
+	new_start = drm_buddy_block_offset(block);
+	new_end = new_start + new_size - 1;
+
+	mark_free(mm, block);
+
+	list_add(&block->tmp_link, &dfs);
+
+	do {
+		u64 block_start;
+		u64 block_end;
+
+		block = list_first_entry_or_null(&dfs,
+						 struct drm_buddy_block,
+						 tmp_link);
+		if (!block)
+			break;
+
+		list_del(&block->tmp_link);
+
+		if (count == new_size)
+			return 0;
+
+		block_start = drm_buddy_block_offset(block);
+		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+		if (!overlaps(new_start, new_end, block_start, block_end))
+			continue;
+
+		if (contains(new_start, new_end, block_start, block_end)) {
+			BUG_ON(!drm_buddy_block_is_free(block));
+
+			/* Allocate only required blocks */
+			mark_allocated(block);
+			mm->avail -= drm_buddy_block_size(mm, block);
+			list_add_tail(&block->link, blocks);
+			count += drm_buddy_block_size(mm, block);
+			continue;
+		}
+
+		if (!drm_buddy_block_is_split(block)) {
+			err = split_block(mm, block);
+			if (unlikely(err))
+				goto err_undo;
+		}
+
+		list_add(&block->right->tmp_link, &dfs);
+		list_add(&block->left->tmp_link, &dfs);
+	} while (1);
+
+	return -ENOSPC;
+
+err_undo:
+	buddy = get_buddy(block);
+	if (buddy &&
+	    (drm_buddy_block_is_free(block) &&
+	     drm_buddy_block_is_free(buddy)))
+		__drm_buddy_free(mm, block);
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
 static struct drm_buddy_block *
 alloc_range(struct drm_buddy_mm *mm,
 	    u64 start, u64 end,
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index b6ed5b37cf18..5e1f8c443058 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -97,6 +97,16 @@  static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	if (unlikely(err))
 		goto err_free_blocks;
 
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		mutex_lock(&bman->lock);
+		err = drm_buddy_block_trim(mm,
+				(u64)n_pages << PAGE_SHIFT,
+				&bman_res->blocks);
+		mutex_unlock(&bman->lock);
+		if (unlikely(err))
+			goto err_free_blocks;
+	}
+
 	*res = &bman_res->base;
 	return 0;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index cbe5e648440a..36e8f548acf7 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -145,6 +145,10 @@  int drm_buddy_alloc(struct drm_buddy_mm *mm,
 		    struct list_head *blocks,
 		    unsigned long flags);
 
+int drm_buddy_block_trim(struct drm_buddy_mm *mm,
+			 u64 new_size,
+			 struct list_head *blocks);
+
 void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block *block);
 
 void drm_buddy_free_list(struct drm_buddy_mm *mm, struct list_head *objects);