diff mbox series

[RFC,v6,3/7] drm: ttm_pool: Rework ttm_pool_free_page to allow us to use it as a function pointer

Message ID 20210205080621.3102035-4-john.stultz@linaro.org (mailing list archive)
State New, archived
Headers show
Series Generic page pool & deferred freeing for system dmabuf heap | expand

Commit Message

John Stultz Feb. 5, 2021, 8:06 a.m. UTC
This refactors ttm_pool_free_page(), and by adding extra entries
to ttm_pool_page_dat, we then use it for all allocations, which
allows us to simplify the arguments needed to be passed to
ttm_pool_free_page().

This is critical for allowing the free function to be called
by the sharable drm_page_pool logic.

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 60 ++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 28 deletions(-)

Comments

Christian König Feb. 5, 2021, 8:28 a.m. UTC | #1
Am 05.02.21 um 09:06 schrieb John Stultz:
> This refactors ttm_pool_free_page(), and by adding extra entries
> to ttm_pool_page_dat, we then use it for all allocations, which
> allows us to simplify the arguments needed to be passed to
> ttm_pool_free_page().

This is a clear NAK since the peer page data is just a workaround for 
the DMA-API hack to grab pages from there.

Adding this to all pages would increase the memory footprint drastically.

christian.

>
> This is critical for allowing the free function to be called
> by the sharable drm_page_pool logic.
>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Liam Mark <lmark@codeaurora.org>
> Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
> Cc: Laura Abbott <labbott@kernel.org>
> Cc: Brian Starkey <Brian.Starkey@arm.com>
> Cc: Hridya Valsaraju <hridya@google.com>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Sandeep Patil <sspatil@google.com>
> Cc: Daniel Mentz <danielmentz@google.com>
> Cc: Ørjan Eide <orjan.eide@arm.com>
> Cc: Robin Murphy <robin.murphy@arm.com>
> Cc: Ezequiel Garcia <ezequiel@collabora.com>
> Cc: Simon Ser <contact@emersion.fr>
> Cc: James Jones <jajones@nvidia.com>
> Cc: linux-media@vger.kernel.org
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: John Stultz <john.stultz@linaro.org>
> ---
>   drivers/gpu/drm/ttm/ttm_pool.c | 60 ++++++++++++++++++----------------
>   1 file changed, 32 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index c0274e256be3..eca36678f967 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -44,10 +44,14 @@
>   /**
>    * struct ttm_pool_page_dat - Helper object for coherent DMA mappings
>    *
> + * @pool: ttm_pool pointer the page was allocated by
> + * @caching: the caching value the allocated page was configured for
>    * @addr: original DMA address returned for the mapping
>    * @vaddr: original vaddr return for the mapping and order in the lower bits
>    */
>   struct ttm_pool_page_dat {
> +	struct ttm_pool *pool;
> +	enum ttm_caching caching;
>   	dma_addr_t addr;
>   	unsigned long vaddr;
>   };
> @@ -71,13 +75,20 @@ static struct shrinker mm_shrinker;
>   
>   /* Allocate pages of size 1 << order with the given gfp_flags */
>   static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
> -					unsigned int order)
> +					unsigned int order, enum ttm_caching caching)
>   {
>   	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
>   	struct ttm_pool_page_dat *dat;
>   	struct page *p;
>   	void *vaddr;
>   
> +	dat = kmalloc(sizeof(*dat), GFP_KERNEL);
> +	if (!dat)
> +		return NULL;
> +
> +	dat->pool = pool;
> +	dat->caching = caching;
> +
>   	/* Don't set the __GFP_COMP flag for higher order allocations.
>   	 * Mapping pages directly into an userspace process and calling
>   	 * put_page() on a TTM allocated page is illegal.
> @@ -88,15 +99,13 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
>   
>   	if (!pool->use_dma_alloc) {
>   		p = alloc_pages(gfp_flags, order);
> -		if (p)
> -			p->private = order;
> +		if (!p)
> +			goto error_free;
> +		dat->vaddr = order;
> +		p->private = (unsigned long)dat;
>   		return p;
>   	}
>   
> -	dat = kmalloc(sizeof(*dat), GFP_KERNEL);
> -	if (!dat)
> -		return NULL;
> -
>   	if (order)
>   		attr |= DMA_ATTR_NO_WARN;
>   
> @@ -123,34 +132,34 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
>   }
>   
>   /* Reset the caching and pages of size 1 << order */
> -static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
> -			       unsigned int order, struct page *p)
> +static int ttm_pool_free_page(struct page *p, unsigned int order)
>   {
>   	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
> -	struct ttm_pool_page_dat *dat;
> +	struct ttm_pool_page_dat *dat = (void *)p->private;
>   	void *vaddr;
>   
>   #ifdef CONFIG_X86
>   	/* We don't care that set_pages_wb is inefficient here. This is only
>   	 * used when we have to shrink and CPU overhead is irrelevant then.
>   	 */
> -	if (caching != ttm_cached && !PageHighMem(p))
> +	if (dat->caching != ttm_cached && !PageHighMem(p))
>   		set_pages_wb(p, 1 << order);
>   #endif
>   
> -	if (!pool || !pool->use_dma_alloc) {
> +	if (!dat->pool || !dat->pool->use_dma_alloc) {
>   		__free_pages(p, order);
> -		return;
> +		goto out;
>   	}
>   
>   	if (order)
>   		attr |= DMA_ATTR_NO_WARN;
>   
> -	dat = (void *)p->private;
>   	vaddr = (void *)(dat->vaddr & PAGE_MASK);
> -	dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dat->addr,
> +	dma_free_attrs(dat->pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dat->addr,
>   		       attr);
> +out:
>   	kfree(dat);
> +	return 1 << order;
>   }
>   
>   /* Apply a new caching to an array of pages */
> @@ -264,7 +273,7 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
>   	mutex_unlock(&shrinker_lock);
>   
>   	list_for_each_entry_safe(p, tmp, &pt->pages, lru)
> -		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> +		ttm_pool_free_page(p, pt->order);
>   }
>   
>   /* Return the pool_type to use for the given caching and order */
> @@ -307,7 +316,7 @@ static unsigned int ttm_pool_shrink(void)
>   
>   	p = ttm_pool_type_take(pt);
>   	if (p) {
> -		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> +		ttm_pool_free_page(p, pt->order);
>   		num_freed = 1 << pt->order;
>   	} else {
>   		num_freed = 0;
> @@ -322,13 +331,9 @@ static unsigned int ttm_pool_shrink(void)
>   /* Return the allocation order based for a page */
>   static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
>   {
> -	if (pool->use_dma_alloc) {
> -		struct ttm_pool_page_dat *dat = (void *)p->private;
> -
> -		return dat->vaddr & ~PAGE_MASK;
> -	}
> +	struct ttm_pool_page_dat *dat = (void *)p->private;
>   
> -	return p->private;
> +	return dat->vaddr & ~PAGE_MASK;
>   }
>   
>   /**
> @@ -379,7 +384,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   		if (p) {
>   			apply_caching = true;
>   		} else {
> -			p = ttm_pool_alloc_page(pool, gfp_flags, order);
> +			p = ttm_pool_alloc_page(pool, gfp_flags, order, tt->caching);
>   			if (p && PageHighMem(p))
>   				apply_caching = true;
>   		}
> @@ -428,13 +433,13 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   	ttm_mem_global_free_page(&ttm_mem_glob, p, (1 << order) * PAGE_SIZE);
>   
>   error_free_page:
> -	ttm_pool_free_page(pool, tt->caching, order, p);
> +	ttm_pool_free_page(p, order);
>   
>   error_free_all:
>   	num_pages = tt->num_pages - num_pages;
>   	for (i = 0; i < num_pages; ) {
>   		order = ttm_pool_page_order(pool, tt->pages[i]);
> -		ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
> +		ttm_pool_free_page(tt->pages[i], order);
>   		i += 1 << order;
>   	}
>   
> @@ -470,8 +475,7 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
>   		if (pt)
>   			ttm_pool_type_give(pt, tt->pages[i]);
>   		else
> -			ttm_pool_free_page(pool, tt->caching, order,
> -					   tt->pages[i]);
> +			ttm_pool_free_page(tt->pages[i], order);
>   
>   		i += num_pages;
>   	}
John Stultz Feb. 5, 2021, 7:47 p.m. UTC | #2
On Fri, Feb 5, 2021 at 12:28 AM Christian König
<christian.koenig@amd.com> wrote:
> Am 05.02.21 um 09:06 schrieb John Stultz:
> > This refactors ttm_pool_free_page(), and by adding extra entries
> > to ttm_pool_page_dat, we then use it for all allocations, which
> > allows us to simplify the arguments needed to be passed to
> > ttm_pool_free_page().
>
> This is a clear NAK since the peer page data is just a workaround for
> the DMA-API hack to grab pages from there.
>
> Adding this to all pages would increase the memory footprint drastically.

Yea, that's a good point!  Hrm... bummer. I'll have to see if there's
some other way I can get the needed context for the free from the
generic page-pool side.

Thanks so much for the review!
-john
Christian König Feb. 9, 2021, 12:13 p.m. UTC | #3
Am 05.02.21 um 20:47 schrieb John Stultz:
> On Fri, Feb 5, 2021 at 12:28 AM Christian König
> <christian.koenig@amd.com> wrote:
>> Am 05.02.21 um 09:06 schrieb John Stultz:
>>> This refactors ttm_pool_free_page(), and by adding extra entries
>>> to ttm_pool_page_dat, we then use it for all allocations, which
>>> allows us to simplify the arguments needed to be passed to
>>> ttm_pool_free_page().
>> This is a clear NAK since the peer page data is just a workaround for
>> the DMA-API hack to grab pages from there.
>>
>> Adding this to all pages would increase the memory footprint drastically.
> Yea, that's a good point!  Hrm... bummer. I'll have to see if there's
> some other way I can get the needed context for the free from the
> generic page-pool side.

What exactly is the problem here? As far as I can see we just have the 
lru entry (list_head) and the pool.

How the lru is cast to the page can be completely pool implementation 
specific.

Regards,
Christian.

>
> Thanks so much for the review!
> -john
John Stultz Feb. 9, 2021, 5:39 p.m. UTC | #4
On Tue, Feb 9, 2021 at 4:14 AM Christian König <christian.koenig@amd.com> wrote:
> Am 05.02.21 um 20:47 schrieb John Stultz:
> > On Fri, Feb 5, 2021 at 12:28 AM Christian König
> > <christian.koenig@amd.com> wrote:
> >> Adding this to all pages would increase the memory footprint drastically.
> > Yea, that's a good point!  Hrm... bummer. I'll have to see if there's
> > some other way I can get the needed context for the free from the
> > generic page-pool side.
>
> What exactly is the problem here?

Me, usually. :)

> As far as I can see we just have the
> lru entry (list_head) and the pool.

Yea, I reworked it to an embedded drm_page_pool struct, but that is
mostly a list_head.

> How the lru is cast to the page can be completely pool implementation
> specific.

Yea, I had it do container_of(), just haven't gotten around to sending
it out yet.

Thanks so much for the feedback and ideas!

thanks
-john
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index c0274e256be3..eca36678f967 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -44,10 +44,14 @@ 
 /**
  * struct ttm_pool_page_dat - Helper object for coherent DMA mappings
  *
+ * @pool: ttm_pool pointer the page was allocated by
+ * @caching: the caching value the allocated page was configured for
  * @addr: original DMA address returned for the mapping
  * @vaddr: original vaddr return for the mapping and order in the lower bits
  */
 struct ttm_pool_page_dat {
+	struct ttm_pool *pool;
+	enum ttm_caching caching;
 	dma_addr_t addr;
 	unsigned long vaddr;
 };
@@ -71,13 +75,20 @@  static struct shrinker mm_shrinker;
 
 /* Allocate pages of size 1 << order with the given gfp_flags */
 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
-					unsigned int order)
+					unsigned int order, enum ttm_caching caching)
 {
 	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
 	struct ttm_pool_page_dat *dat;
 	struct page *p;
 	void *vaddr;
 
+	dat = kmalloc(sizeof(*dat), GFP_KERNEL);
+	if (!dat)
+		return NULL;
+
+	dat->pool = pool;
+	dat->caching = caching;
+
 	/* Don't set the __GFP_COMP flag for higher order allocations.
 	 * Mapping pages directly into an userspace process and calling
 	 * put_page() on a TTM allocated page is illegal.
@@ -88,15 +99,13 @@  static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
 
 	if (!pool->use_dma_alloc) {
 		p = alloc_pages(gfp_flags, order);
-		if (p)
-			p->private = order;
+		if (!p)
+			goto error_free;
+		dat->vaddr = order;
+		p->private = (unsigned long)dat;
 		return p;
 	}
 
-	dat = kmalloc(sizeof(*dat), GFP_KERNEL);
-	if (!dat)
-		return NULL;
-
 	if (order)
 		attr |= DMA_ATTR_NO_WARN;
 
@@ -123,34 +132,34 @@  static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
 }
 
 /* Reset the caching and pages of size 1 << order */
-static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
-			       unsigned int order, struct page *p)
+static int ttm_pool_free_page(struct page *p, unsigned int order)
 {
 	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
-	struct ttm_pool_page_dat *dat;
+	struct ttm_pool_page_dat *dat = (void *)p->private;
 	void *vaddr;
 
 #ifdef CONFIG_X86
 	/* We don't care that set_pages_wb is inefficient here. This is only
 	 * used when we have to shrink and CPU overhead is irrelevant then.
 	 */
-	if (caching != ttm_cached && !PageHighMem(p))
+	if (dat->caching != ttm_cached && !PageHighMem(p))
 		set_pages_wb(p, 1 << order);
 #endif
 
-	if (!pool || !pool->use_dma_alloc) {
+	if (!dat->pool || !dat->pool->use_dma_alloc) {
 		__free_pages(p, order);
-		return;
+		goto out;
 	}
 
 	if (order)
 		attr |= DMA_ATTR_NO_WARN;
 
-	dat = (void *)p->private;
 	vaddr = (void *)(dat->vaddr & PAGE_MASK);
-	dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dat->addr,
+	dma_free_attrs(dat->pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dat->addr,
 		       attr);
+out:
 	kfree(dat);
+	return 1 << order;
 }
 
 /* Apply a new caching to an array of pages */
@@ -264,7 +273,7 @@  static void ttm_pool_type_fini(struct ttm_pool_type *pt)
 	mutex_unlock(&shrinker_lock);
 
 	list_for_each_entry_safe(p, tmp, &pt->pages, lru)
-		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
+		ttm_pool_free_page(p, pt->order);
 }
 
 /* Return the pool_type to use for the given caching and order */
@@ -307,7 +316,7 @@  static unsigned int ttm_pool_shrink(void)
 
 	p = ttm_pool_type_take(pt);
 	if (p) {
-		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
+		ttm_pool_free_page(p, pt->order);
 		num_freed = 1 << pt->order;
 	} else {
 		num_freed = 0;
@@ -322,13 +331,9 @@  static unsigned int ttm_pool_shrink(void)
 /* Return the allocation order based for a page */
 static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
 {
-	if (pool->use_dma_alloc) {
-		struct ttm_pool_page_dat *dat = (void *)p->private;
-
-		return dat->vaddr & ~PAGE_MASK;
-	}
+	struct ttm_pool_page_dat *dat = (void *)p->private;
 
-	return p->private;
+	return dat->vaddr & ~PAGE_MASK;
 }
 
 /**
@@ -379,7 +384,7 @@  int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 		if (p) {
 			apply_caching = true;
 		} else {
-			p = ttm_pool_alloc_page(pool, gfp_flags, order);
+			p = ttm_pool_alloc_page(pool, gfp_flags, order, tt->caching);
 			if (p && PageHighMem(p))
 				apply_caching = true;
 		}
@@ -428,13 +433,13 @@  int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	ttm_mem_global_free_page(&ttm_mem_glob, p, (1 << order) * PAGE_SIZE);
 
 error_free_page:
-	ttm_pool_free_page(pool, tt->caching, order, p);
+	ttm_pool_free_page(p, order);
 
 error_free_all:
 	num_pages = tt->num_pages - num_pages;
 	for (i = 0; i < num_pages; ) {
 		order = ttm_pool_page_order(pool, tt->pages[i]);
-		ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
+		ttm_pool_free_page(tt->pages[i], order);
 		i += 1 << order;
 	}
 
@@ -470,8 +475,7 @@  void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
 		if (pt)
 			ttm_pool_type_give(pt, tt->pages[i]);
 		else
-			ttm_pool_free_page(pool, tt->caching, order,
-					   tt->pages[i]);
+			ttm_pool_free_page(tt->pages[i], order);
 
 		i += num_pages;
 	}