diff mbox series

[v6,4/6] drm: implement a method to free unused pages

Message ID 20211226222425.544863-4-Arunpravin.PaneerSelvam@amd.com (mailing list archive)
State New, archived
Headers show
Series [v6,1/6] drm: move the buddy allocator from i915 into common drm | expand

Commit Message

Paneer Selvam, Arunpravin Dec. 26, 2021, 10:24 p.m. UTC
On contiguous allocation, we round up the size
to the *next* power of 2, implement a function
to free the unused pages after the newly allocate block.

v2(Matthew Auld):
  - replace function name 'drm_buddy_free_unused_pages' with
    drm_buddy_block_trim
  - replace input argument name 'actual_size' with 'new_size'
  - add more validation checks for input arguments
  - add overlaps check to avoid needless searching and splitting
  - merged the below patch to see the feature in action
     - add free unused pages support to i915 driver
  - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
    are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - in case of trim, at __alloc_range() split_block failure path
    marks the block as free and removes it from the original list,
    potentially also freeing it, to overcome this problem, we turn
    the drm_buddy_block_trim() input node into a temporary node to
    prevent recursively freeing itself, but still retain the
    un-splitting/freeing of the other nodes(Matthew Auld)

  - modify the drm_buddy_block_trim() function return type

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 61 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  8 +++
 include/drm/drm_buddy.h                       |  4 ++
 3 files changed, 73 insertions(+)

Comments

Matthew Auld Jan. 4, 2022, 2:11 p.m. UTC | #1
On 26/12/2021 22:24, Arunpravin wrote:
> On contiguous allocation, we round up the size
> to the *next* power of 2, implement a function
> to free the unused pages after the newly allocate block.
> 
> v2(Matthew Auld):
>    - replace function name 'drm_buddy_free_unused_pages' with
>      drm_buddy_block_trim
>    - replace input argument name 'actual_size' with 'new_size'
>    - add more validation checks for input arguments
>    - add overlaps check to avoid needless searching and splitting
>    - merged the below patch to see the feature in action
>       - add free unused pages support to i915 driver
>    - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
>      are all globally visible
> 
> v3(Matthew Auld):
>    - remove trim method error handling as we address the failure case
>      at drm_buddy_block_trim() function
> 
> v4:
>    - in case of trim, at __alloc_range() split_block failure path
>      marks the block as free and removes it from the original list,
>      potentially also freeing it, to overcome this problem, we turn
>      the drm_buddy_block_trim() input node into a temporary node to
>      prevent recursively freeing itself, but still retain the
>      un-splitting/freeing of the other nodes(Matthew Auld)
> 
>    - modify the drm_buddy_block_trim() function return type
> 
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c                   | 61 +++++++++++++++++++
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  8 +++
>   include/drm/drm_buddy.h                       |  4 ++
>   3 files changed, 73 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index eddc1eeda02e..855afcaf7edd 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -538,6 +538,67 @@ static int __drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>   	return __alloc_range(mm, &dfs, start, size, blocks);
>   }
>   
> +/**
> + * drm_buddy_block_trim - free unused pages
> + *
> + * @mm: DRM buddy manager
> + * @new_size: original size requested
> + * @blocks: output list head to add allocated blocks
> + *
> + * For contiguous allocation, we round up the size to the nearest
> + * power of two value, drivers consume *actual* size, so remaining
> + * portions are unused and it can be freed.
> + */
> +void drm_buddy_block_trim(struct drm_buddy_mm *mm,
> +			  u64 new_size,
> +			  struct list_head *blocks)

It might be better to just return the error, and let the user decide if 
they want to ignore it? Also we might want some kind of unit test for 
this, so having an actual return value might be useful there.

> +{
> +	struct drm_buddy_block *parent;
> +	struct drm_buddy_block *block;
> +	LIST_HEAD(dfs);
> +	u64 new_start;
> +	int err;
> +
> +	if (!list_is_singular(blocks))
> +		return;
> +
> +	block = list_first_entry(blocks,
> +				 struct drm_buddy_block,
> +				 link);
> +
> +	if (!drm_buddy_block_is_allocated(block))
> +		return;
> +
> +	if (new_size > drm_buddy_block_size(mm, block))
> +		return;
> +
> +	if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
> +		return;
> +
> +	if (new_size == drm_buddy_block_size(mm, block))
> +		return;
> +
> +	list_del(&block->link);
> +	mark_free(mm, block);
> +	mm->avail += drm_buddy_block_size(mm, block);
> +
> +	/* Prevent recursively freeing this node */
> +	parent = block->parent;
> +	block->parent = NULL;
> +
> +	new_start = drm_buddy_block_offset(block);
> +	list_add(&block->tmp_link, &dfs);
> +	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks);
> +	if (err) {
> +		mark_allocated(block);
> +		mm->avail -= drm_buddy_block_size(mm, block);
> +		list_add(&block->link, blocks);
> +	}
> +
> +	block->parent = parent;
> +}
> +EXPORT_SYMBOL(drm_buddy_block_trim);
> +
>   /**
>    * drm_buddy_alloc - allocate power-of-two blocks
>    *
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 7c58efb60dba..05f924f32e96 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -97,6 +97,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	if (unlikely(err))
>   		goto err_free_blocks;
>   
> +	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +		mutex_lock(&bman->lock);
> +		drm_buddy_block_trim(mm,
> +				(u64)n_pages << PAGE_SHIFT,

AFAIK, n_pages has already been rounded up to the next power-of-two 
here, so this becomes a noop. I assume we need to use the "original" 
size here?

> +				&bman_res->blocks);
> +		mutex_unlock(&bman->lock);
> +	}
> +
>   	*res = &bman_res->base;
>   	return 0;
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index f573b02304f4..703866a87939 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -146,6 +146,10 @@ int drm_buddy_alloc(struct drm_buddy_mm *mm,
>   		    struct list_head *blocks,
>   		    unsigned long flags);
>   
> +void drm_buddy_block_trim(struct drm_buddy_mm *mm,
> +			  u64 new_size,
> +			  struct list_head *blocks);
> +
>   void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block *block);
>   
>   void drm_buddy_free_list(struct drm_buddy_mm *mm, struct list_head *objects);
>
Paneer Selvam, Arunpravin Jan. 6, 2022, 5:51 p.m. UTC | #2
On 04/01/22 7:41 pm, Matthew Auld wrote:
> On 26/12/2021 22:24, Arunpravin wrote:
>> On contiguous allocation, we round up the size
>> to the *next* power of 2, implement a function
>> to free the unused pages after the newly allocate block.
>>
>> v2(Matthew Auld):
>>    - replace function name 'drm_buddy_free_unused_pages' with
>>      drm_buddy_block_trim
>>    - replace input argument name 'actual_size' with 'new_size'
>>    - add more validation checks for input arguments
>>    - add overlaps check to avoid needless searching and splitting
>>    - merged the below patch to see the feature in action
>>       - add free unused pages support to i915 driver
>>    - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
>>      are all globally visible
>>
>> v3(Matthew Auld):
>>    - remove trim method error handling as we address the failure case
>>      at drm_buddy_block_trim() function
>>
>> v4:
>>    - in case of trim, at __alloc_range() split_block failure path
>>      marks the block as free and removes it from the original list,
>>      potentially also freeing it, to overcome this problem, we turn
>>      the drm_buddy_block_trim() input node into a temporary node to
>>      prevent recursively freeing itself, but still retain the
>>      un-splitting/freeing of the other nodes(Matthew Auld)
>>
>>    - modify the drm_buddy_block_trim() function return type
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c                   | 61 +++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  8 +++
>>   include/drm/drm_buddy.h                       |  4 ++
>>   3 files changed, 73 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index eddc1eeda02e..855afcaf7edd 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -538,6 +538,67 @@ static int __drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>>   	return __alloc_range(mm, &dfs, start, size, blocks);
>>   }
>>   
>> +/**
>> + * drm_buddy_block_trim - free unused pages
>> + *
>> + * @mm: DRM buddy manager
>> + * @new_size: original size requested
>> + * @blocks: output list head to add allocated blocks
>> + *
>> + * For contiguous allocation, we round up the size to the nearest
>> + * power of two value, drivers consume *actual* size, so remaining
>> + * portions are unused and it can be freed.
>> + */
>> +void drm_buddy_block_trim(struct drm_buddy_mm *mm,
>> +			  u64 new_size,
>> +			  struct list_head *blocks)
> 
> It might be better to just return the error, and let the user decide if 
> they want to ignore it? Also we might want some kind of unit test for 
> this, so having an actual return value might be useful there.
> 
sure will revert it
>> +{
>> +	struct drm_buddy_block *parent;
>> +	struct drm_buddy_block *block;
>> +	LIST_HEAD(dfs);
>> +	u64 new_start;
>> +	int err;
>> +
>> +	if (!list_is_singular(blocks))
>> +		return;
>> +
>> +	block = list_first_entry(blocks,
>> +				 struct drm_buddy_block,
>> +				 link);
>> +
>> +	if (!drm_buddy_block_is_allocated(block))
>> +		return;
>> +
>> +	if (new_size > drm_buddy_block_size(mm, block))
>> +		return;
>> +
>> +	if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
>> +		return;
>> +
>> +	if (new_size == drm_buddy_block_size(mm, block))
>> +		return;
>> +
>> +	list_del(&block->link);
>> +	mark_free(mm, block);
>> +	mm->avail += drm_buddy_block_size(mm, block);
>> +
>> +	/* Prevent recursively freeing this node */
>> +	parent = block->parent;
>> +	block->parent = NULL;
>> +
>> +	new_start = drm_buddy_block_offset(block);
>> +	list_add(&block->tmp_link, &dfs);
>> +	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks);
>> +	if (err) {
>> +		mark_allocated(block);
>> +		mm->avail -= drm_buddy_block_size(mm, block);
>> +		list_add(&block->link, blocks);
>> +	}
>> +
>> +	block->parent = parent;
>> +}
>> +EXPORT_SYMBOL(drm_buddy_block_trim);
>> +
>>   /**
>>    * drm_buddy_alloc - allocate power-of-two blocks
>>    *
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 7c58efb60dba..05f924f32e96 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -97,6 +97,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	if (unlikely(err))
>>   		goto err_free_blocks;
>>   
>> +	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +		mutex_lock(&bman->lock);
>> +		drm_buddy_block_trim(mm,
>> +				(u64)n_pages << PAGE_SHIFT,
> 
> AFAIK, n_pages has already been rounded up to the next power-of-two 
> here, so this becomes a noop. I assume we need to use the "original" 
> size here?
> 
ah yes..missed it, will change as below
		u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT;

                mutex_lock(&bman->lock);
                drm_buddy_block_trim(mm,
                                original_size,
                                &bman_res->blocks);


>> +				&bman_res->blocks);
>> +		mutex_unlock(&bman->lock);
>> +	}
>> +
>>   	*res = &bman_res->base;
>>   	return 0;
>>   
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index f573b02304f4..703866a87939 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -146,6 +146,10 @@ int drm_buddy_alloc(struct drm_buddy_mm *mm,
>>   		    struct list_head *blocks,
>>   		    unsigned long flags);
>>   
>> +void drm_buddy_block_trim(struct drm_buddy_mm *mm,
>> +			  u64 new_size,
>> +			  struct list_head *blocks);
>> +
>>   void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block *block);
>>   
>>   void drm_buddy_free_list(struct drm_buddy_mm *mm, struct list_head *objects);
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index eddc1eeda02e..855afcaf7edd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -538,6 +538,67 @@  static int __drm_buddy_alloc_range(struct drm_buddy_mm *mm,
 	return __alloc_range(mm, &dfs, start, size, blocks);
 }
 
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: output list head to add allocated blocks
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and it can be freed.
+ */
+void drm_buddy_block_trim(struct drm_buddy_mm *mm,
+			  u64 new_size,
+			  struct list_head *blocks)
+{
+	struct drm_buddy_block *parent;
+	struct drm_buddy_block *block;
+	LIST_HEAD(dfs);
+	u64 new_start;
+	int err;
+
+	if (!list_is_singular(blocks))
+		return;
+
+	block = list_first_entry(blocks,
+				 struct drm_buddy_block,
+				 link);
+
+	if (!drm_buddy_block_is_allocated(block))
+		return;
+
+	if (new_size > drm_buddy_block_size(mm, block))
+		return;
+
+	if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
+		return;
+
+	if (new_size == drm_buddy_block_size(mm, block))
+		return;
+
+	list_del(&block->link);
+	mark_free(mm, block);
+	mm->avail += drm_buddy_block_size(mm, block);
+
+	/* Prevent recursively freeing this node */
+	parent = block->parent;
+	block->parent = NULL;
+
+	new_start = drm_buddy_block_offset(block);
+	list_add(&block->tmp_link, &dfs);
+	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks);
+	if (err) {
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		list_add(&block->link, blocks);
+	}
+
+	block->parent = parent;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
 /**
  * drm_buddy_alloc - allocate power-of-two blocks
  *
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 7c58efb60dba..05f924f32e96 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -97,6 +97,14 @@  static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	if (unlikely(err))
 		goto err_free_blocks;
 
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		mutex_lock(&bman->lock);
+		drm_buddy_block_trim(mm,
+				(u64)n_pages << PAGE_SHIFT,
+				&bman_res->blocks);
+		mutex_unlock(&bman->lock);
+	}
+
 	*res = &bman_res->base;
 	return 0;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f573b02304f4..703866a87939 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -146,6 +146,10 @@  int drm_buddy_alloc(struct drm_buddy_mm *mm,
 		    struct list_head *blocks,
 		    unsigned long flags);
 
+void drm_buddy_block_trim(struct drm_buddy_mm *mm,
+			  u64 new_size,
+			  struct list_head *blocks);
+
 void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block *block);
 
 void drm_buddy_free_list(struct drm_buddy_mm *mm, struct list_head *objects);