diff mbox series

drm/ttm: optimize pool allocations a bit

Message ID 20221107195808.1873-1-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series drm/ttm: optimize pool allocations a bit | expand

Commit Message

Christian König Nov. 7, 2022, 7:58 p.m. UTC
If we got a page pool use it as much as possible.

If we can't get more pages from the pool allocate as much as possible.

Only if that still doesn't work reduce the order and try again.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 81 ++++++++++++++++++++++++----------
 1 file changed, 57 insertions(+), 24 deletions(-)

Comments

Luben Tuikov Nov. 7, 2022, 10:41 p.m. UTC | #1
On 2022-11-07 14:58, Christian König wrote:
> If we got a page pool use it as much as possible.
> 
> If we can't get more pages from the pool allocate as much as possible.
> 
> Only if that still doesn't work reduce the order and try again.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 81 ++++++++++++++++++++++++----------
>  1 file changed, 57 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 21b61631f73a..cf15874cf380 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -344,6 +344,27 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
>  	return p->private;
>  }
>  
> +/* Called when we got a page, either from a pool or newly allocated */
> +int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
> +			    struct page *p, dma_addr_t **dma_addr,
> +			    unsigned long *num_pages, struct page ***pages)
> +{
> +	unsigned int i;
> +	int r;
> +
> +	if (*dma_addr) {
> +		r = ttm_pool_map(pool, order, p, dma_addr);
> +		if (r)
> +			return r;
> +	}
> +
> +	*num_pages -= 1 << order;
> +	for (i = 1 << order; i; --i)
> +		*((*pages)++) = p++;

Since we're using a for-loop here anyway, perhaps it's better to simplify-clarify
this as:
	for (i = 1 << order; i; i--, (*pages)++, p++)
		**pages = p;

Regards,
Luben

> +
> +	return 0;
> +}
> +
>  /**
>   * ttm_pool_alloc - Fill a ttm_tt object
>   *
> @@ -385,45 +406,57 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>  	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
>  	     num_pages;
>  	     order = min_t(unsigned int, order, __fls(num_pages))) {
> -		bool apply_caching = false;
>  		struct ttm_pool_type *pt;
>  
>  		pt = ttm_pool_select_type(pool, tt->caching, order);
>  		p = pt ? ttm_pool_type_take(pt) : NULL;
>  		if (p) {
> -			apply_caching = true;
> -		} else {
> -			p = ttm_pool_alloc_page(pool, gfp_flags, order);
> -			if (p && PageHighMem(p))
> -				apply_caching = true;
> -		}
> -
> -		if (!p) {
> -			if (order) {
> -				--order;
> -				continue;
> -			}
> -			r = -ENOMEM;
> -			goto error_free_all;
> -		}
> -
> -		if (apply_caching) {
>  			r = ttm_pool_apply_caching(caching, pages,
>  						   tt->caching);
>  			if (r)
>  				goto error_free_page;
> -			caching = pages + (1 << order);
> +
> +			while (p) {
> +				r = ttm_pool_page_allocated(pool, order, p,
> +							    &dma_addr,
> +							    &num_pages,
> +							    &pages);
> +				if (r)
> +					goto error_free_page;
> +
> +				if (num_pages < (1 << order))
> +					break;
> +
> +				p = ttm_pool_type_take(pt);
> +			}
> +			caching = pages;
>  		}
>  
> -		if (dma_addr) {
> -			r = ttm_pool_map(pool, order, p, &dma_addr);
> +		while (num_pages >= (1 << order) &&
> +		       (p = ttm_pool_alloc_page(pool, gfp_flags, order))) {
> +
> +			if (PageHighMem(p)) {
> +				r = ttm_pool_apply_caching(caching, pages,
> +							   tt->caching);
> +				if (r)
> +					goto error_free_page;
> +			}
> +			r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
> +						    &num_pages, &pages);
>  			if (r)
>  				goto error_free_page;
> +			if (PageHighMem(p))
> +				caching = pages;
>  		}
>  
> -		num_pages -= 1 << order;
> -		for (i = 1 << order; i; --i)
> -			*(pages++) = p++;
> +		if (!p) {
> +			if (order) {
> +				--order;
> +				continue;
> +			}
> +			r = -ENOMEM;
> +			goto error_free_all;
> +		}
>  	}
>  
>  	r = ttm_pool_apply_caching(caching, pages, tt->caching);
Felix Kuehling Nov. 8, 2022, 5:57 a.m. UTC | #2
Am 2022-11-07 um 14:58 schrieb Christian König:
> If we got a page pool use it as much as possible.
>
> If we can't get more pages from the pool allocate as much as possible.
>
> Only if that still doesn't work reduce the order and try again.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_pool.c | 81 ++++++++++++++++++++++++----------
>   1 file changed, 57 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 21b61631f73a..cf15874cf380 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -344,6 +344,27 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
>   	return p->private;
>   }
>   
> +/* Called when we got a page, either from a pool or newly allocated */
> +int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,

This function should be static.


> +			    struct page *p, dma_addr_t **dma_addr,
> +			    unsigned long *num_pages, struct page ***pages)
> +{
> +	unsigned int i;
> +	int r;
> +
> +	if (*dma_addr) {
> +		r = ttm_pool_map(pool, order, p, dma_addr);
> +		if (r)
> +			return r;
> +	}
> +
> +	*num_pages -= 1 << order;
> +	for (i = 1 << order; i; --i)
> +		*((*pages)++) = p++;
> +
> +	return 0;
> +}
> +
>   /**
>    * ttm_pool_alloc - Fill a ttm_tt object
>    *
> @@ -385,45 +406,57 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
>   	     num_pages;
>   	     order = min_t(unsigned int, order, __fls(num_pages))) {
> -		bool apply_caching = false;
>   		struct ttm_pool_type *pt;
>   
>   		pt = ttm_pool_select_type(pool, tt->caching, order);
>   		p = pt ? ttm_pool_type_take(pt) : NULL;
>   		if (p) {
> -			apply_caching = true;
> -		} else {
> -			p = ttm_pool_alloc_page(pool, gfp_flags, order);
> -			if (p && PageHighMem(p))
> -				apply_caching = true;
> -		}
> -
> -		if (!p) {
> -			if (order) {
> -				--order;
> -				continue;
> -			}
> -			r = -ENOMEM;
> -			goto error_free_all;
> -		}
> -
> -		if (apply_caching) {
>   			r = ttm_pool_apply_caching(caching, pages,
>   						   tt->caching);
>   			if (r)
>   				goto error_free_page;
> -			caching = pages + (1 << order);
> +
> +			while (p) {

This looks like it should be a do-while loop. If you get here, there 
will be at least one iteration.

With those two nit-picks fixed, this patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> +				r = ttm_pool_page_allocated(pool, order, p,
> +							    &dma_addr,
> +							    &num_pages,
> +							    &pages);
> +				if (r)
> +					goto error_free_page;
> +
> +				if (num_pages < (1 << order))
> +					break;
> +
> +				p = ttm_pool_type_take(pt);
> +			}
> +			caching = pages;
>   		}
>   
> -		if (dma_addr) {
> -			r = ttm_pool_map(pool, order, p, &dma_addr);
> +		while (num_pages >= (1 << order) &&
> +		       (p = ttm_pool_alloc_page(pool, gfp_flags, order))) {
> +
> +			if (PageHighMem(p)) {
> +				r = ttm_pool_apply_caching(caching, pages,
> +							   tt->caching);
> +				if (r)
> +					goto error_free_page;
> +			}
> +			r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
> +						    &num_pages, &pages);
>   			if (r)
>   				goto error_free_page;
> +			if (PageHighMem(p))
> +				caching = pages;
>   		}
>   
> -		num_pages -= 1 << order;
> -		for (i = 1 << order; i; --i)
> -			*(pages++) = p++;
> +		if (!p) {
> +			if (order) {
> +				--order;
> +				continue;
> +			}
> +			r = -ENOMEM;
> +			goto error_free_all;
> +		}
>   	}
>   
>   	r = ttm_pool_apply_caching(caching, pages, tt->caching);
kernel test robot Nov. 9, 2022, 5:45 a.m. UTC | #3
Hi Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm/drm-next drm-exynos/exynos-drm-next drm-intel/for-linux-next linus/master v6.1-rc4 next-20221108]
[cannot apply to drm-tip/drm-tip]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-ttm-optimize-pool-allocations-a-bit/20221108-040029
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20221107195808.1873-1-christian.koenig%40amd.com
patch subject: [PATCH] drm/ttm: optimize pool allocations a bit
config: s390-allyesconfig
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/ec55cfc840243dd3cd0d0a02ce9ae54b8b474c4d
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-optimize-pool-allocations-a-bit/20221108-040029
        git checkout ec55cfc840243dd3cd0d0a02ce9ae54b8b474c4d
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash drivers/gpu/drm/ttm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/ttm/ttm_pool.c:348:5: warning: no previous prototype for 'ttm_pool_page_allocated' [-Wmissing-prototypes]
     348 | int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
         |     ^~~~~~~~~~~~~~~~~~~~~~~


vim +/ttm_pool_page_allocated +348 drivers/gpu/drm/ttm/ttm_pool.c

   346	
   347	/* Called when we got a page, either from a pool or newly allocated */
 > 348	int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
   349				    struct page *p, dma_addr_t **dma_addr,
   350				    unsigned long *num_pages, struct page ***pages)
   351	{
   352		unsigned int i;
   353		int r;
   354	
   355		if (*dma_addr) {
   356			r = ttm_pool_map(pool, order, p, dma_addr);
   357			if (r)
   358				return r;
   359		}
   360	
   361		*num_pages -= 1 << order;
   362		for (i = 1 << order; i; --i)
   363			*((*pages)++) = p++;
   364	
   365		return 0;
   366	}
   367
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 21b61631f73a..cf15874cf380 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -344,6 +344,27 @@  static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
 	return p->private;
 }
 
+/* Called when we got a page, either from a pool or newly allocated */
+int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
+			    struct page *p, dma_addr_t **dma_addr,
+			    unsigned long *num_pages, struct page ***pages)
+{
+	unsigned int i;
+	int r;
+
+	if (*dma_addr) {
+		r = ttm_pool_map(pool, order, p, dma_addr);
+		if (r)
+			return r;
+	}
+
+	*num_pages -= 1 << order;
+	for (i = 1 << order; i; --i)
+		*((*pages)++) = p++;
+
+	return 0;
+}
+
 /**
  * ttm_pool_alloc - Fill a ttm_tt object
  *
@@ -385,45 +406,57 @@  int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
 	     num_pages;
 	     order = min_t(unsigned int, order, __fls(num_pages))) {
-		bool apply_caching = false;
 		struct ttm_pool_type *pt;
 
 		pt = ttm_pool_select_type(pool, tt->caching, order);
 		p = pt ? ttm_pool_type_take(pt) : NULL;
 		if (p) {
-			apply_caching = true;
-		} else {
-			p = ttm_pool_alloc_page(pool, gfp_flags, order);
-			if (p && PageHighMem(p))
-				apply_caching = true;
-		}
-
-		if (!p) {
-			if (order) {
-				--order;
-				continue;
-			}
-			r = -ENOMEM;
-			goto error_free_all;
-		}
-
-		if (apply_caching) {
 			r = ttm_pool_apply_caching(caching, pages,
 						   tt->caching);
 			if (r)
 				goto error_free_page;
-			caching = pages + (1 << order);
+
+			while (p) {
+				r = ttm_pool_page_allocated(pool, order, p,
+							    &dma_addr,
+							    &num_pages,
+							    &pages);
+				if (r)
+					goto error_free_page;
+
+				if (num_pages < (1 << order))
+					break;
+
+				p = ttm_pool_type_take(pt);
+			}
+			caching = pages;
 		}
 
-		if (dma_addr) {
-			r = ttm_pool_map(pool, order, p, &dma_addr);
+		while (num_pages >= (1 << order) &&
+		       (p = ttm_pool_alloc_page(pool, gfp_flags, order))) {
+
+			if (PageHighMem(p)) {
+				r = ttm_pool_apply_caching(caching, pages,
+							   tt->caching);
+				if (r)
+					goto error_free_page;
+			}
+			r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
+						    &num_pages, &pages);
 			if (r)
 				goto error_free_page;
+			if (PageHighMem(p))
+				caching = pages;
 		}
 
-		num_pages -= 1 << order;
-		for (i = 1 << order; i; --i)
-			*(pages++) = p++;
+		if (!p) {
+			if (order) {
+				--order;
+				continue;
+			}
+			r = -ENOMEM;
+			goto error_free_all;
+		}
 	}
 
 	r = ttm_pool_apply_caching(caching, pages, tt->caching);