diff mbox series

[v6] drm: Optimise for continuous memory allocation

Message ID 20221218065708.93332-1-xinhui.pan@amd.com (mailing list archive)
State New, archived
Headers show
Series [v6] drm: Optimise for continuous memory allocation | expand

Commit Message

Pan, Xinhui Dec. 18, 2022, 6:57 a.m. UTC
Optimise a little when continuous memory request fails.

There are memory holes and continuous memory request usually fails when
order is too big.
Currently buddy only look for exactly order memory for such request.
Now we can try again to look for several smaller continuous memory on
failure.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v5:
reworked
---
 drivers/gpu/drm/drm_buddy.c | 161 ++++++++++++++++++++++++++++++++++--
 1 file changed, 154 insertions(+), 7 deletions(-)

Comments

Christian König Dec. 19, 2022, 8:37 a.m. UTC | #1
Am 18.12.22 um 07:57 schrieb xinhui pan:
> Optimise a little when continuous memory request fails.
>
> There are memory holes and continuous memory request usually fails when
> order is too big.
> Currently buddy only look for exactly order memory for such request.
> Now we can try again to look for several smaller continuous memory on
> failure.

I'm still pretty sure that this is illegal.

See the order is not only the minimum we need for linear allocation, but 
also the minimum alignment we need.

So if you look at some block combination like 010 when searching for an 
order 2 allocation you satisfy the contiguous constrain, but not the 
alignment constrain and that's illegal.

Additional to that we have a huge additional CPU overhead for contiguous 
allocations with that.

Regards,
Christian.

>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
> change from v5:
> reworked
> ---
>   drivers/gpu/drm/drm_buddy.c | 161 ++++++++++++++++++++++++++++++++++--
>   1 file changed, 154 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 11bb59399471..6c795e1b3247 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -386,6 +386,140 @@ alloc_range_bias(struct drm_buddy *mm,
>   	return ERR_PTR(err);
>   }
>   
> +static void __continuous_block_in_tree(struct drm_buddy_block *top_block,
> +				       struct list_head *fbl,
> +				       int left,
> +				       int min_order)
> +{
> +	/*
> +	 * Look for continuous memory of
> +	 * [top_block) when left is true or (top_block] when left is false.
> +	 * The list of fbl looks like (top_block1][free_block][...][top_blockX).
> +	 * Memory offset is in ascending order.
> +	 */
> +	while (top_block) {
> +		struct drm_buddy_block *block = top_block;
> +		int order;
> +
> +		while (drm_buddy_block_is_split(block))
> +			block = left ? block->left : block->right;
> +
> +		order = drm_buddy_block_order(block);
> +		if (order < min_order || !drm_buddy_block_is_free(block))
> +			return;
> +
> +		if (left)
> +			list_add_tail(&block->tmp_link, fbl);
> +		else
> +			list_add(&block->tmp_link, fbl);
> +
> +		if (order == min_order)
> +			return;
> +		top_block = __get_buddy(block);
> +	}
> +}
> +
> +static bool __free_block_in_order(struct list_head *fbl,
> +				  struct drm_buddy_block *cur,
> +				  int order,
> +				  struct drm_buddy_block **first,
> +				  struct drm_buddy_block **last)
> +{
> +	struct drm_buddy_block *fb = cur, *lb = list_next_entry(cur, tmp_link);
> +	u64 pages = BIT(order);
> +	u64 cur_pages = 0;
> +
> +	/*
> +	 * Look for continuous memory which satisfy requested order.
> +	 * Memory in list fbl are already in below order.
> +	 * 1) Memory offset are in ascending order.
> +	 * 2) Memory size are in ascending order from left to middle and
> +	 * descending order from middle to right.
> +	 * So walk through the list of fbl from middle to both sides to
> +	 * choose the bigger memory.
> +	 * This is because one memory with order X are composed with 2 of order X-1
> +	 * or 1 of order X-1 and 2 of order X-2, etc. Looks like below.
> +	 *      n
> +	 *    {∑(X - y)} + {2 * (X-n-1))}
> +	 *      1
> +	 * And the last 2 memory of order (X-n-1) are at the two sides of list.
> +	 */
> +	list_for_each_entry_from_reverse(fb, fbl, tmp_link) {
> +		int prev_order = drm_buddy_block_order(fb);
> +
> +		list_for_each_entry_from(lb, fbl, tmp_link) {
> +			int next_order = drm_buddy_block_order(lb);
> +
> +			if (prev_order <= next_order)
> +				cur_pages += BIT(next_order);
> +			else
> +				break;
> +		}
> +
> +		cur_pages += BIT(prev_order);
> +		if (pages == cur_pages) {
> +			*first = fb;
> +			*last = list_prev_entry(lb, tmp_link);
> +			return true;
> +		}
> +		BUG_ON(pages < cur_pages);
> +	}
> +
> +	*first = *last = NULL;
> +	return false;
> +}
> +
> +static struct drm_buddy_block *
> +find_continuous_blocks(struct drm_buddy *mm,
> +		       int order,
> +		       unsigned long flags,
> +		       struct drm_buddy_block **lb)
> +{
> +	struct list_head *head = &mm->free_list[order - 1];
> +	struct drm_buddy_block *free_block, *first = NULL, *last = NULL;
> +
> +	/*
> +	 * Look for continuous free memory in buddy and buddy-in-law.
> +	 * IOW, the most left blocks at right of free block and the most right
> +	 * blocks at left of free block.
> +	 */
> +
> +	list_for_each_entry(free_block, head, link) {
> +		struct drm_buddy_block *buddy, *parent, *block;
> +		int left, min_order = 0;
> +		LIST_HEAD(fbl);
> +
> +		parent = free_block->parent;
> +		if (!parent)
> +			continue;
> +
> +		left = parent->left == free_block;
> +		list_add(&free_block->tmp_link, &fbl);
> +		buddy = __get_buddy(free_block);
> +		__continuous_block_in_tree(buddy, &fbl, left, min_order);
> +
> +		while (parent && !((parent->left == block) ^ left)) {
> +			block = parent;
> +			parent = parent->parent;
> +		}
> +
> +		if (!parent)
> +			continue;
> +
> +		buddy = __get_buddy(block);
> +		__continuous_block_in_tree(buddy, &fbl, !left, min_order);
> +
> +		/* list head of fbl is invalid outside.
> +		 * Walk through list from first fo last only.
> +		 */
> +		if (__free_block_in_order(&fbl, free_block, order, &first, &last))
> +			break;
> +	}
> +
> +	*lb = last;
> +	return first;
> +}
> +
>   static struct drm_buddy_block *
>   get_maxblock(struct list_head *head)
>   {
> @@ -637,7 +771,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			   struct list_head *blocks,
>   			   unsigned long flags)
>   {
> -	struct drm_buddy_block *block = NULL;
> +	struct drm_buddy_block *block = NULL, *last_block = NULL;
>   	unsigned int min_order, order;
>   	unsigned long pages;
>   	LIST_HEAD(allocated);
> @@ -689,17 +823,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   				break;
>   
>   			if (order-- == min_order) {
> +				if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
> +				    min_order != 0 && pages == BIT(min_order)) {
> +					block = find_continuous_blocks(mm,
> +								       min_order,
> +								       flags,
> +								       &last_block);
> +					if (block)
> +						break;
> +				}
>   				err = -ENOSPC;
>   				goto err_free;
>   			}
>   		} while (1);
>   
> -		mark_allocated(block);
> -		mm->avail -= drm_buddy_block_size(mm, block);
> -		kmemleak_update_trace(block);
> -		list_add_tail(&block->link, &allocated);
> -
> -		pages -= BIT(order);
> +		do {
> +			mark_allocated(block);
> +			mm->avail -= drm_buddy_block_size(mm, block);
> +			kmemleak_update_trace(block);
> +			list_add_tail(&block->link, &allocated);
> +			pages -= BIT(drm_buddy_block_order(block));
> +			if (block == last_block || !last_block)
> +				break;
> +			block = list_next_entry(block, tmp_link);
> +		} while (block);
>   
>   		if (!pages)
>   			break;
Dan Carpenter Dec. 22, 2022, 6:53 p.m. UTC | #2
Hi xinhui,

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/xinhui-pan/drm-Optimise-for-continuous-memory-allocation/20221218-145922
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20221218065708.93332-1-xinhui.pan%40amd.com
patch subject: [PATCH v6] drm: Optimise for continuous memory allocation
config: s390-randconfig-m041-20221218
compiler: s390-linux-gcc (GCC) 12.1.0

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <error27@gmail.com>

smatch warnings:
drivers/gpu/drm/drm_buddy.c:501 find_continuous_blocks() error: uninitialized symbol 'block'.

vim +/block +501 drivers/gpu/drm/drm_buddy.c

8a257b57bc11a2 xinhui pan 2022-12-18  472  static struct drm_buddy_block *
8a257b57bc11a2 xinhui pan 2022-12-18  473  find_continuous_blocks(struct drm_buddy *mm,
8a257b57bc11a2 xinhui pan 2022-12-18  474  		       int order,
8a257b57bc11a2 xinhui pan 2022-12-18  475  		       unsigned long flags,
8a257b57bc11a2 xinhui pan 2022-12-18  476  		       struct drm_buddy_block **lb)
8a257b57bc11a2 xinhui pan 2022-12-18  477  {
8a257b57bc11a2 xinhui pan 2022-12-18  478  	struct list_head *head = &mm->free_list[order - 1];
8a257b57bc11a2 xinhui pan 2022-12-18  479  	struct drm_buddy_block *free_block, *first = NULL, *last = NULL;
8a257b57bc11a2 xinhui pan 2022-12-18  480  
8a257b57bc11a2 xinhui pan 2022-12-18  481  	/*
8a257b57bc11a2 xinhui pan 2022-12-18  482  	 * Look for continuous free memory in buddy and buddy-in-law.
8a257b57bc11a2 xinhui pan 2022-12-18  483  	 * IOW, the most left blocks at right of free block and the most right
8a257b57bc11a2 xinhui pan 2022-12-18  484  	 * blocks at left of free block.
8a257b57bc11a2 xinhui pan 2022-12-18  485  	 */
8a257b57bc11a2 xinhui pan 2022-12-18  486  
8a257b57bc11a2 xinhui pan 2022-12-18  487  	list_for_each_entry(free_block, head, link) {
8a257b57bc11a2 xinhui pan 2022-12-18  488  		struct drm_buddy_block *buddy, *parent, *block;
8a257b57bc11a2 xinhui pan 2022-12-18  489  		int left, min_order = 0;
8a257b57bc11a2 xinhui pan 2022-12-18  490  		LIST_HEAD(fbl);
8a257b57bc11a2 xinhui pan 2022-12-18  491  
8a257b57bc11a2 xinhui pan 2022-12-18  492  		parent = free_block->parent;
8a257b57bc11a2 xinhui pan 2022-12-18  493  		if (!parent)
8a257b57bc11a2 xinhui pan 2022-12-18  494  			continue;
8a257b57bc11a2 xinhui pan 2022-12-18  495  
8a257b57bc11a2 xinhui pan 2022-12-18  496  		left = parent->left == free_block;
8a257b57bc11a2 xinhui pan 2022-12-18  497  		list_add(&free_block->tmp_link, &fbl);
8a257b57bc11a2 xinhui pan 2022-12-18  498  		buddy = __get_buddy(free_block);
8a257b57bc11a2 xinhui pan 2022-12-18  499  		__continuous_block_in_tree(buddy, &fbl, left, min_order);
8a257b57bc11a2 xinhui pan 2022-12-18  500  
8a257b57bc11a2 xinhui pan 2022-12-18 @501  		while (parent && !((parent->left == block) ^ left)) {
                                                                                            ^^^^^
Not initialized on first iteration.

8a257b57bc11a2 xinhui pan 2022-12-18  502  			block = parent;
8a257b57bc11a2 xinhui pan 2022-12-18  503  			parent = parent->parent;
8a257b57bc11a2 xinhui pan 2022-12-18  504  		}
8a257b57bc11a2 xinhui pan 2022-12-18  505  
8a257b57bc11a2 xinhui pan 2022-12-18  506  		if (!parent)
8a257b57bc11a2 xinhui pan 2022-12-18  507  			continue;
8a257b57bc11a2 xinhui pan 2022-12-18  508  
8a257b57bc11a2 xinhui pan 2022-12-18  509  		buddy = __get_buddy(block);
8a257b57bc11a2 xinhui pan 2022-12-18  510  		__continuous_block_in_tree(buddy, &fbl, !left, min_order);
8a257b57bc11a2 xinhui pan 2022-12-18  511  
8a257b57bc11a2 xinhui pan 2022-12-18  512  		/* list head of fbl is invalid outside.
8a257b57bc11a2 xinhui pan 2022-12-18  513  		 * Walk through list from first fo last only.
8a257b57bc11a2 xinhui pan 2022-12-18  514  		 */
8a257b57bc11a2 xinhui pan 2022-12-18  515  		if (__free_block_in_order(&fbl, free_block, order, &first, &last))
8a257b57bc11a2 xinhui pan 2022-12-18  516  			break;
8a257b57bc11a2 xinhui pan 2022-12-18  517  	}
8a257b57bc11a2 xinhui pan 2022-12-18  518  
8a257b57bc11a2 xinhui pan 2022-12-18  519  	*lb = last;
8a257b57bc11a2 xinhui pan 2022-12-18  520  	return first;
8a257b57bc11a2 xinhui pan 2022-12-18  521  }
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..6c795e1b3247 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -386,6 +386,140 @@  alloc_range_bias(struct drm_buddy *mm,
 	return ERR_PTR(err);
 }
 
+static void __continuous_block_in_tree(struct drm_buddy_block *top_block,
+				       struct list_head *fbl,
+				       int left,
+				       int min_order)
+{
+	/*
+	 * Look for continuous memory of
+	 * [top_block) when left is true or (top_block] when left is false.
+	 * The list of fbl looks like (top_block1][free_block][...][top_blockX).
+	 * Memory offset is in ascending order.
+	 */
+	while (top_block) {
+		struct drm_buddy_block *block = top_block;
+		int order;
+
+		while (drm_buddy_block_is_split(block))
+			block = left ? block->left : block->right;
+
+		order = drm_buddy_block_order(block);
+		if (order < min_order || !drm_buddy_block_is_free(block))
+			return;
+
+		if (left)
+			list_add_tail(&block->tmp_link, fbl);
+		else
+			list_add(&block->tmp_link, fbl);
+
+		if (order == min_order)
+			return;
+		top_block = __get_buddy(block);
+	}
+}
+
+static bool __free_block_in_order(struct list_head *fbl,
+				  struct drm_buddy_block *cur,
+				  int order,
+				  struct drm_buddy_block **first,
+				  struct drm_buddy_block **last)
+{
+	struct drm_buddy_block *fb = cur, *lb = list_next_entry(cur, tmp_link);
+	u64 pages = BIT(order);
+	u64 cur_pages = 0;
+
+	/*
+	 * Look for continuous memory which satisfy requested order.
+	 * Memory in list fbl are already in below order.
+	 * 1) Memory offset are in ascending order.
+	 * 2) Memory size are in ascending order from left to middle and
+	 * descending order from middle to right.
+	 * So walk through the list of fbl from middle to both sides to
+	 * choose the bigger memory.
+	 * This is because one memory with order X are composed with 2 of order X-1
+	 * or 1 of order X-1 and 2 of order X-2, etc. Looks like below.
+	 *      n
+	 *    {∑(X - y)} + {2 * (X-n-1))}
+	 *      1
+	 * And the last 2 memory of order (X-n-1) are at the two sides of list.
+	 */
+	list_for_each_entry_from_reverse(fb, fbl, tmp_link) {
+		int prev_order = drm_buddy_block_order(fb);
+
+		list_for_each_entry_from(lb, fbl, tmp_link) {
+			int next_order = drm_buddy_block_order(lb);
+
+			if (prev_order <= next_order)
+				cur_pages += BIT(next_order);
+			else
+				break;
+		}
+
+		cur_pages += BIT(prev_order);
+		if (pages == cur_pages) {
+			*first = fb;
+			*last = list_prev_entry(lb, tmp_link);
+			return true;
+		}
+		BUG_ON(pages < cur_pages);
+	}
+
+	*first = *last = NULL;
+	return false;
+}
+
+static struct drm_buddy_block *
+find_continuous_blocks(struct drm_buddy *mm,
+		       int order,
+		       unsigned long flags,
+		       struct drm_buddy_block **lb)
+{
+	struct list_head *head = &mm->free_list[order - 1];
+	struct drm_buddy_block *free_block, *first = NULL, *last = NULL;
+
+	/*
+	 * Look for continuous free memory in buddy and buddy-in-law.
+	 * IOW, the most left blocks at right of free block and the most right
+	 * blocks at left of free block.
+	 */
+
+	list_for_each_entry(free_block, head, link) {
+		struct drm_buddy_block *buddy, *parent, *block;
+		int left, min_order = 0;
+		LIST_HEAD(fbl);
+
+		parent = free_block->parent;
+		if (!parent)
+			continue;
+
+		left = parent->left == free_block;
+		list_add(&free_block->tmp_link, &fbl);
+		buddy = __get_buddy(free_block);
+		__continuous_block_in_tree(buddy, &fbl, left, min_order);
+
+		while (parent && !((parent->left == block) ^ left)) {
+			block = parent;
+			parent = parent->parent;
+		}
+
+		if (!parent)
+			continue;
+
+		buddy = __get_buddy(block);
+		__continuous_block_in_tree(buddy, &fbl, !left, min_order);
+
+		/* list head of fbl is invalid outside.
+		 * Walk through list from first fo last only.
+		 */
+		if (__free_block_in_order(&fbl, free_block, order, &first, &last))
+			break;
+	}
+
+	*lb = last;
+	return first;
+}
+
 static struct drm_buddy_block *
 get_maxblock(struct list_head *head)
 {
@@ -637,7 +771,7 @@  int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   struct list_head *blocks,
 			   unsigned long flags)
 {
-	struct drm_buddy_block *block = NULL;
+	struct drm_buddy_block *block = NULL, *last_block = NULL;
 	unsigned int min_order, order;
 	unsigned long pages;
 	LIST_HEAD(allocated);
@@ -689,17 +823,30 @@  int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 				break;
 
 			if (order-- == min_order) {
+				if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+				    min_order != 0 && pages == BIT(min_order)) {
+					block = find_continuous_blocks(mm,
+								       min_order,
+								       flags,
+								       &last_block);
+					if (block)
+						break;
+				}
 				err = -ENOSPC;
 				goto err_free;
 			}
 		} while (1);
 
-		mark_allocated(block);
-		mm->avail -= drm_buddy_block_size(mm, block);
-		kmemleak_update_trace(block);
-		list_add_tail(&block->link, &allocated);
-
-		pages -= BIT(order);
+		do {
+			mark_allocated(block);
+			mm->avail -= drm_buddy_block_size(mm, block);
+			kmemleak_update_trace(block);
+			list_add_tail(&block->link, &allocated);
+			pages -= BIT(drm_buddy_block_order(block));
+			if (block == last_block || !last_block)
+				break;
+			block = list_next_entry(block, tmp_link);
+		} while (block);
 
 		if (!pages)
 			break;