diff mbox series

[RFC,5/6] dma-buf: system_heap: Add pagepool support to system heap

Message ID 20200926042453.67517-6-john.stultz@linaro.org (mailing list archive)
State New, archived
Headers show
Series dma-buf: Performance improvements for system heap | expand

Commit Message

John Stultz Sept. 26, 2020, 4:24 a.m. UTC
Reuse/abuse the pagepool code from the network code to speed
up allocation performance.

This is similar to the ION pagepool usage, but tries to
utilize generic code instead of a custom implementation.

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/dma-buf/heaps/Kconfig       |  1 +
 drivers/dma-buf/heaps/system_heap.c | 32 +++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

Comments

Chris Goldsworthy Sept. 30, 2020, 4:46 a.m. UTC | #1
On 2020-09-25 21:24, John Stultz wrote:
> Reuse/abuse the pagepool code from the network code to speed
> up allocation performance.
> 
> This is similar to the ION pagepool usage, but tries to
> utilize generic code instead of a custom implementation.
> 
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Liam Mark <lmark@codeaurora.org>
> Cc: Laura Abbott <labbott@kernel.org>
> Cc: Brian Starkey <Brian.Starkey@arm.com>
> Cc: Hridya Valsaraju <hridya@google.com>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Sandeep Patil <sspatil@google.com>
> Cc: Ørjan Eide <orjan.eide@arm.com>
> Cc: Robin Murphy <robin.murphy@arm.com>
> Cc: Ezequiel Garcia <ezequiel@collabora.com>
> Cc: Simon Ser <contact@emersion.fr>
> Cc: James Jones <jajones@nvidia.com>
> Cc: linux-media@vger.kernel.org
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: John Stultz <john.stultz@linaro.org>
> ---
>  drivers/dma-buf/heaps/Kconfig       |  1 +
>  drivers/dma-buf/heaps/system_heap.c | 32 +++++++++++++++++++++++++----
>  2 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/dma-buf/heaps/Kconfig 
> b/drivers/dma-buf/heaps/Kconfig
> index a5eef06c4226..f13cde4321b1 100644
> --- a/drivers/dma-buf/heaps/Kconfig
> +++ b/drivers/dma-buf/heaps/Kconfig
> @@ -1,6 +1,7 @@
>  config DMABUF_HEAPS_SYSTEM
>  	bool "DMA-BUF System Heap"
>  	depends on DMABUF_HEAPS
> +	select PAGE_POOL
>  	help
>  	  Choose this option to enable the system dmabuf heap. The system 
> heap
>  	  is backed by pages from the buddy allocator. If in doubt, say Y.
> diff --git a/drivers/dma-buf/heaps/system_heap.c
> b/drivers/dma-buf/heaps/system_heap.c
> index 882a632e9bb7..9f57b4c8ae69 100644
> --- a/drivers/dma-buf/heaps/system_heap.c
> +++ b/drivers/dma-buf/heaps/system_heap.c
> @@ -20,6 +20,7 @@
>  #include <linux/scatterlist.h>
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
> +#include <net/page_pool.h>
> 
>  struct dma_heap *sys_heap;
> 
> @@ -46,6 +47,7 @@ struct dma_heap_attachment {
>  static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, 
> LOW_ORDER_GFP};
>  static const unsigned int orders[] = {8, 4, 0};
>  #define NUM_ORDERS ARRAY_SIZE(orders)
> +struct page_pool *pools[NUM_ORDERS];
> 
>  static struct sg_table *dup_sg_table(struct sg_table *table)
>  {
> @@ -264,13 +266,17 @@ static void system_heap_dma_buf_release(struct
> dma_buf *dmabuf)
>  	struct system_heap_buffer *buffer = dmabuf->priv;
>  	struct sg_table *table;
>  	struct scatterlist *sg;
> -	int i;
> +	int i, j;
> 
>  	table = &buffer->sg_table;
>  	for_each_sg(table->sgl, sg, table->nents, i) {
>  		struct page *page = sg_page(sg);
> 
> -		__free_pages(page, compound_order(page));
> +		for (j = 0; j < NUM_ORDERS; j++) {
> +			if (compound_order(page) == orders[j])
> +				break;
> +		}
> +		page_pool_put_full_page(pools[j], page, false);
>  	}
>  	sg_free_table(table);
>  	kfree(buffer);
> @@ -300,8 +306,7 @@ static struct page
> *alloc_largest_available(unsigned long size,
>  			continue;
>  		if (max_order < orders[i])
>  			continue;
> -
> -		page = alloc_pages(order_flags[i], orders[i]);
> +		page = page_pool_alloc_pages(pools[i], order_flags[i]);
>  		if (!page)
>  			continue;
>  		return page;
> @@ -406,6 +411,25 @@ static const struct dma_heap_ops system_heap_ops = 
> {
>  static int system_heap_create(void)
>  {
>  	struct dma_heap_export_info exp_info;
> +	int i;
> +
> +	for (i = 0; i < NUM_ORDERS; i++) {
> +		struct page_pool_params pp;
> +
> +		memset(&pp, 0, sizeof(pp));
> +		pp.order = orders[i];
> +		pp.dma_dir = DMA_BIDIRECTIONAL;
> +		pools[i] = page_pool_create(&pp);
> +
> +		if (IS_ERR(pools[i])) {
> +			int j;
> +
> +			pr_err("%s: page pool creation failed!\n", __func__);
> +			for (j = 0; j < i; j++)
> +				page_pool_destroy(pools[j]);
> +			return PTR_ERR(pools[i]);
> +		}
> +	}
> 
>  	exp_info.name = "system";
>  	exp_info.ops = &system_heap_ops;

This is cool, I didn't know about this pooling code under /net/core.  
Nice and compact.
Chris Goldsworthy Oct. 1, 2020, 2:49 p.m. UTC | #2
On 2020-09-29 21:46, Chris Goldsworthy wrote:
> On 2020-09-25 21:24, John Stultz wrote:
>> Reuse/abuse the pagepool code from the network code to speed
>> up allocation performance.
>> 
>> This is similar to the ION pagepool usage, but tries to
>> utilize generic code instead of a custom implementation.
>> 
>> Cc: Sumit Semwal <sumit.semwal@linaro.org>
>> Cc: Liam Mark <lmark@codeaurora.org>
>> Cc: Laura Abbott <labbott@kernel.org>
>> Cc: Brian Starkey <Brian.Starkey@arm.com>
>> Cc: Hridya Valsaraju <hridya@google.com>
>> Cc: Suren Baghdasaryan <surenb@google.com>
>> Cc: Sandeep Patil <sspatil@google.com>
>> Cc: Ørjan Eide <orjan.eide@arm.com>
>> Cc: Robin Murphy <robin.murphy@arm.com>
>> Cc: Ezequiel Garcia <ezequiel@collabora.com>
>> Cc: Simon Ser <contact@emersion.fr>
>> Cc: James Jones <jajones@nvidia.com>
>> Cc: linux-media@vger.kernel.org
>> Cc: dri-devel@lists.freedesktop.org
>> Signed-off-by: John Stultz <john.stultz@linaro.org>
>> ---
>>  drivers/dma-buf/heaps/Kconfig       |  1 +
>>  drivers/dma-buf/heaps/system_heap.c | 32 
>> +++++++++++++++++++++++++----
>>  2 files changed, 29 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/dma-buf/heaps/Kconfig 
>> b/drivers/dma-buf/heaps/Kconfig
>> index a5eef06c4226..f13cde4321b1 100644
>> --- a/drivers/dma-buf/heaps/Kconfig
>> +++ b/drivers/dma-buf/heaps/Kconfig
>> @@ -1,6 +1,7 @@
>>  config DMABUF_HEAPS_SYSTEM
>>  	bool "DMA-BUF System Heap"
>>  	depends on DMABUF_HEAPS
>> +	select PAGE_POOL
>>  	help
>>  	  Choose this option to enable the system dmabuf heap. The system 
>> heap
>>  	  is backed by pages from the buddy allocator. If in doubt, say Y.
>> diff --git a/drivers/dma-buf/heaps/system_heap.c
>> b/drivers/dma-buf/heaps/system_heap.c
>> index 882a632e9bb7..9f57b4c8ae69 100644
>> --- a/drivers/dma-buf/heaps/system_heap.c
>> +++ b/drivers/dma-buf/heaps/system_heap.c
>> @@ -20,6 +20,7 @@
>>  #include <linux/scatterlist.h>
>>  #include <linux/slab.h>
>>  #include <linux/vmalloc.h>
>> +#include <net/page_pool.h>
>> 
>>  struct dma_heap *sys_heap;
>> 
>> @@ -46,6 +47,7 @@ struct dma_heap_attachment {
>>  static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, 
>> LOW_ORDER_GFP};
>>  static const unsigned int orders[] = {8, 4, 0};
>>  #define NUM_ORDERS ARRAY_SIZE(orders)
>> +struct page_pool *pools[NUM_ORDERS];
>> 
>>  static struct sg_table *dup_sg_table(struct sg_table *table)
>>  {
>> @@ -264,13 +266,17 @@ static void system_heap_dma_buf_release(struct
>> dma_buf *dmabuf)
>>  	struct system_heap_buffer *buffer = dmabuf->priv;
>>  	struct sg_table *table;
>>  	struct scatterlist *sg;
>> -	int i;
>> +	int i, j;
>> 
>>  	table = &buffer->sg_table;
>>  	for_each_sg(table->sgl, sg, table->nents, i) {
>>  		struct page *page = sg_page(sg);
>> 
>> -		__free_pages(page, compound_order(page));
>> +		for (j = 0; j < NUM_ORDERS; j++) {
>> +			if (compound_order(page) == orders[j])
>> +				break;
>> +		}
>> +		page_pool_put_full_page(pools[j], page, false);
>>  	}
>>  	sg_free_table(table);
>>  	kfree(buffer);
>> @@ -300,8 +306,7 @@ static struct page
>> *alloc_largest_available(unsigned long size,
>>  			continue;
>>  		if (max_order < orders[i])
>>  			continue;
>> -
>> -		page = alloc_pages(order_flags[i], orders[i]);
>> +		page = page_pool_alloc_pages(pools[i], order_flags[i]);
>>  		if (!page)
>>  			continue;
>>  		return page;
>> @@ -406,6 +411,25 @@ static const struct dma_heap_ops system_heap_ops 
>> = {
>>  static int system_heap_create(void)
>>  {
>>  	struct dma_heap_export_info exp_info;
>> +	int i;
>> +
>> +	for (i = 0; i < NUM_ORDERS; i++) {
>> +		struct page_pool_params pp;
>> +
>> +		memset(&pp, 0, sizeof(pp));
>> +		pp.order = orders[i];
>> +		pp.dma_dir = DMA_BIDIRECTIONAL;

Hey John,

Correct me if I'm wrong, but I think that in order for pp.dma_dir to be 
used in either page_pool_alloc_pages() or page_pool_put_full_page(), we 
need to at least have PP_FLAG_DMA_MAP set (to have 
page_pool_dma_sync_for_device() called, PP_FLAG_DMA_SYNC_DEV should also 
be set I think).  I think you'd also need to to have pp->dev set.  Are 
we setting dma_dir with the intention of doing the necessary CMOs before 
we start using the page?

Thanks,

Chris.

>> +		pools[i] = page_pool_create(&pp);
>> +
>> +		if (IS_ERR(pools[i])) {
>> +			int j;
>> +
>> +			pr_err("%s: page pool creation failed!\n", __func__);
>> +			for (j = 0; j < i; j++)
>> +				page_pool_destroy(pools[j]);
>> +			return PTR_ERR(pools[i]);
>> +		}
>> +	}
>> 
>>  	exp_info.name = "system";
>>  	exp_info.ops = &system_heap_ops;
> 
> This is cool, I didn't know about this pooling code under /net/core.
> Nice and compact.
John Stultz Oct. 1, 2020, 6:28 p.m. UTC | #3
On Thu, Oct 1, 2020 at 7:49 AM Chris Goldsworthy
<cgoldswo@codeaurora.org> wrote:
> On 2020-09-29 21:46, Chris Goldsworthy wrote:
> > On 2020-09-25 21:24, John Stultz wrote:
> >> Reuse/abuse the pagepool code from the network code to speed
> >> up allocation performance.
> >>
> >> This is similar to the ION pagepool usage, but tries to
> >> utilize generic code instead of a custom implementation.
> >>
> >> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> >> Cc: Liam Mark <lmark@codeaurora.org>
> >> Cc: Laura Abbott <labbott@kernel.org>
> >> Cc: Brian Starkey <Brian.Starkey@arm.com>
> >> Cc: Hridya Valsaraju <hridya@google.com>
> >> Cc: Suren Baghdasaryan <surenb@google.com>
> >> Cc: Sandeep Patil <sspatil@google.com>
> >> Cc: Ørjan Eide <orjan.eide@arm.com>
> >> Cc: Robin Murphy <robin.murphy@arm.com>
> >> Cc: Ezequiel Garcia <ezequiel@collabora.com>
> >> Cc: Simon Ser <contact@emersion.fr>
> >> Cc: James Jones <jajones@nvidia.com>
> >> Cc: linux-media@vger.kernel.org
> >> Cc: dri-devel@lists.freedesktop.org
> >> Signed-off-by: John Stultz <john.stultz@linaro.org>
> >> ---
> >>  drivers/dma-buf/heaps/Kconfig       |  1 +
> >>  drivers/dma-buf/heaps/system_heap.c | 32
> >> +++++++++++++++++++++++++----
> >>  2 files changed, 29 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/dma-buf/heaps/Kconfig
> >> b/drivers/dma-buf/heaps/Kconfig
> >> index a5eef06c4226..f13cde4321b1 100644
> >> --- a/drivers/dma-buf/heaps/Kconfig
> >> +++ b/drivers/dma-buf/heaps/Kconfig
> >> @@ -1,6 +1,7 @@
> >>  config DMABUF_HEAPS_SYSTEM
> >>      bool "DMA-BUF System Heap"
> >>      depends on DMABUF_HEAPS
> >> +    select PAGE_POOL
> >>      help
> >>        Choose this option to enable the system dmabuf heap. The system
> >> heap
> >>        is backed by pages from the buddy allocator. If in doubt, say Y.
> >> diff --git a/drivers/dma-buf/heaps/system_heap.c
> >> b/drivers/dma-buf/heaps/system_heap.c
> >> index 882a632e9bb7..9f57b4c8ae69 100644
> >> --- a/drivers/dma-buf/heaps/system_heap.c
> >> +++ b/drivers/dma-buf/heaps/system_heap.c
> >> @@ -20,6 +20,7 @@
> >>  #include <linux/scatterlist.h>
> >>  #include <linux/slab.h>
> >>  #include <linux/vmalloc.h>
> >> +#include <net/page_pool.h>
> >>
> >>  struct dma_heap *sys_heap;
> >>
> >> @@ -46,6 +47,7 @@ struct dma_heap_attachment {
> >>  static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP,
> >> LOW_ORDER_GFP};
> >>  static const unsigned int orders[] = {8, 4, 0};
> >>  #define NUM_ORDERS ARRAY_SIZE(orders)
> >> +struct page_pool *pools[NUM_ORDERS];
> >>
> >>  static struct sg_table *dup_sg_table(struct sg_table *table)
> >>  {
> >> @@ -264,13 +266,17 @@ static void system_heap_dma_buf_release(struct
> >> dma_buf *dmabuf)
> >>      struct system_heap_buffer *buffer = dmabuf->priv;
> >>      struct sg_table *table;
> >>      struct scatterlist *sg;
> >> -    int i;
> >> +    int i, j;
> >>
> >>      table = &buffer->sg_table;
> >>      for_each_sg(table->sgl, sg, table->nents, i) {
> >>              struct page *page = sg_page(sg);
> >>
> >> -            __free_pages(page, compound_order(page));
> >> +            for (j = 0; j < NUM_ORDERS; j++) {
> >> +                    if (compound_order(page) == orders[j])
> >> +                            break;
> >> +            }
> >> +            page_pool_put_full_page(pools[j], page, false);
> >>      }
> >>      sg_free_table(table);
> >>      kfree(buffer);
> >> @@ -300,8 +306,7 @@ static struct page
> >> *alloc_largest_available(unsigned long size,
> >>                      continue;
> >>              if (max_order < orders[i])
> >>                      continue;
> >> -
> >> -            page = alloc_pages(order_flags[i], orders[i]);
> >> +            page = page_pool_alloc_pages(pools[i], order_flags[i]);
> >>              if (!page)
> >>                      continue;
> >>              return page;
> >> @@ -406,6 +411,25 @@ static const struct dma_heap_ops system_heap_ops
> >> = {
> >>  static int system_heap_create(void)
> >>  {
> >>      struct dma_heap_export_info exp_info;
> >> +    int i;
> >> +
> >> +    for (i = 0; i < NUM_ORDERS; i++) {
> >> +            struct page_pool_params pp;
> >> +
> >> +            memset(&pp, 0, sizeof(pp));
> >> +            pp.order = orders[i];
> >> +            pp.dma_dir = DMA_BIDIRECTIONAL;
>
> Hey John,
>
> Correct me if I'm wrong, but I think that in order for pp.dma_dir to be
> used in either page_pool_alloc_pages() or page_pool_put_full_page(), we
> need to at least have PP_FLAG_DMA_MAP set (to have
> page_pool_dma_sync_for_device() called, PP_FLAG_DMA_SYNC_DEV should also
> be set I think).  I think you'd also need to to have pp->dev set.  Are
> we setting dma_dir with the intention of doing the necessary CMOs before
> we start using the page?

Looking, I think my setting of the dma_dir there on the pool is
unnecessary (and as you point out, it doesn't have much effect as long
as the PP_FLAG_DMA_MAP isn't set).
I'm really only using the pagepool as a page cache, and the dmabuf ops
are still used for mapping and syncing operations.

thanks
-john
John Stultz Oct. 1, 2020, 10:07 p.m. UTC | #4
On Tue, Sep 29, 2020 at 9:46 PM Chris Goldsworthy
<cgoldswo@codeaurora.org> wrote:
>
> On 2020-09-25 21:24, John Stultz wrote:
> > Reuse/abuse the pagepool code from the network code to speed
> > up allocation performance.
> >
> > This is similar to the ION pagepool usage, but tries to
> > utilize generic code instead of a custom implementation.
> >
> > Cc: Sumit Semwal <sumit.semwal@linaro.org>
> > Cc: Liam Mark <lmark@codeaurora.org>
> > Cc: Laura Abbott <labbott@kernel.org>
> > Cc: Brian Starkey <Brian.Starkey@arm.com>
> > Cc: Hridya Valsaraju <hridya@google.com>
> > Cc: Suren Baghdasaryan <surenb@google.com>
> > Cc: Sandeep Patil <sspatil@google.com>
> > Cc: Ørjan Eide <orjan.eide@arm.com>
> > Cc: Robin Murphy <robin.murphy@arm.com>
> > Cc: Ezequiel Garcia <ezequiel@collabora.com>
> > Cc: Simon Ser <contact@emersion.fr>
> > Cc: James Jones <jajones@nvidia.com>
> > Cc: linux-media@vger.kernel.org
> > Cc: dri-devel@lists.freedesktop.org
> > Signed-off-by: John Stultz <john.stultz@linaro.org>
> > ---
> >  drivers/dma-buf/heaps/Kconfig       |  1 +
> >  drivers/dma-buf/heaps/system_heap.c | 32 +++++++++++++++++++++++++----
> >  2 files changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/dma-buf/heaps/Kconfig
> > b/drivers/dma-buf/heaps/Kconfig
> > index a5eef06c4226..f13cde4321b1 100644
> > --- a/drivers/dma-buf/heaps/Kconfig
> > +++ b/drivers/dma-buf/heaps/Kconfig
> > @@ -1,6 +1,7 @@
> >  config DMABUF_HEAPS_SYSTEM
> >       bool "DMA-BUF System Heap"
> >       depends on DMABUF_HEAPS
> > +     select PAGE_POOL
> >       help
> >         Choose this option to enable the system dmabuf heap. The system
> > heap
> >         is backed by pages from the buddy allocator. If in doubt, say Y.
> > diff --git a/drivers/dma-buf/heaps/system_heap.c
> > b/drivers/dma-buf/heaps/system_heap.c
> > index 882a632e9bb7..9f57b4c8ae69 100644
> > --- a/drivers/dma-buf/heaps/system_heap.c
> > +++ b/drivers/dma-buf/heaps/system_heap.c
> > @@ -20,6 +20,7 @@
> >  #include <linux/scatterlist.h>
> >  #include <linux/slab.h>
> >  #include <linux/vmalloc.h>
> > +#include <net/page_pool.h>
> >
> >  struct dma_heap *sys_heap;
> >
> > @@ -46,6 +47,7 @@ struct dma_heap_attachment {
> >  static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP,
> > LOW_ORDER_GFP};
> >  static const unsigned int orders[] = {8, 4, 0};
> >  #define NUM_ORDERS ARRAY_SIZE(orders)
> > +struct page_pool *pools[NUM_ORDERS];
> >
> >  static struct sg_table *dup_sg_table(struct sg_table *table)
> >  {
> > @@ -264,13 +266,17 @@ static void system_heap_dma_buf_release(struct
> > dma_buf *dmabuf)
> >       struct system_heap_buffer *buffer = dmabuf->priv;
> >       struct sg_table *table;
> >       struct scatterlist *sg;
> > -     int i;
> > +     int i, j;
> >
> >       table = &buffer->sg_table;
> >       for_each_sg(table->sgl, sg, table->nents, i) {
> >               struct page *page = sg_page(sg);
> >
> > -             __free_pages(page, compound_order(page));
> > +             for (j = 0; j < NUM_ORDERS; j++) {
> > +                     if (compound_order(page) == orders[j])
> > +                             break;
> > +             }
> > +             page_pool_put_full_page(pools[j], page, false);
> >       }
> >       sg_free_table(table);
> >       kfree(buffer);
> > @@ -300,8 +306,7 @@ static struct page
> > *alloc_largest_available(unsigned long size,
> >                       continue;
> >               if (max_order < orders[i])
> >                       continue;
> > -
> > -             page = alloc_pages(order_flags[i], orders[i]);
> > +             page = page_pool_alloc_pages(pools[i], order_flags[i]);
> >               if (!page)
> >                       continue;
> >               return page;
> > @@ -406,6 +411,25 @@ static const struct dma_heap_ops system_heap_ops =
> > {
> >  static int system_heap_create(void)
> >  {
> >       struct dma_heap_export_info exp_info;
> > +     int i;
> > +
> > +     for (i = 0; i < NUM_ORDERS; i++) {
> > +             struct page_pool_params pp;
> > +
> > +             memset(&pp, 0, sizeof(pp));
> > +             pp.order = orders[i];
> > +             pp.dma_dir = DMA_BIDIRECTIONAL;
> > +             pools[i] = page_pool_create(&pp);
> > +
> > +             if (IS_ERR(pools[i])) {
> > +                     int j;
> > +
> > +                     pr_err("%s: page pool creation failed!\n", __func__);
> > +                     for (j = 0; j < i; j++)
> > +                             page_pool_destroy(pools[j]);
> > +                     return PTR_ERR(pools[i]);
> > +             }
> > +     }
> >
> >       exp_info.name = "system";
> >       exp_info.ops = &system_heap_ops;
>
> This is cool, I didn't know about this pooling code under /net/core.
> Nice and compact.

Oh, bummer. I just realized when allocating w/ __GFP_ZERO from the
page-pool, the logic doesn't actually clear pages when pulling from
the cache.
So unfortunately this is what accounts for much of the performance
benefit I was seeing with this approach, so I'll have to retract my
claim on the performance gain with this. :(

I've got a first pass at zeroing the pages we put into the pool, but
the numbers are not so great just yet so I've got some further work to
do.

thanks
-john
diff mbox series

Patch

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index a5eef06c4226..f13cde4321b1 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -1,6 +1,7 @@ 
 config DMABUF_HEAPS_SYSTEM
 	bool "DMA-BUF System Heap"
 	depends on DMABUF_HEAPS
+	select PAGE_POOL
 	help
 	  Choose this option to enable the system dmabuf heap. The system heap
 	  is backed by pages from the buddy allocator. If in doubt, say Y.
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
index 882a632e9bb7..9f57b4c8ae69 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -20,6 +20,7 @@ 
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <net/page_pool.h>
 
 struct dma_heap *sys_heap;
 
@@ -46,6 +47,7 @@  struct dma_heap_attachment {
 static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP};
 static const unsigned int orders[] = {8, 4, 0};
 #define NUM_ORDERS ARRAY_SIZE(orders)
+struct page_pool *pools[NUM_ORDERS];
 
 static struct sg_table *dup_sg_table(struct sg_table *table)
 {
@@ -264,13 +266,17 @@  static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
 	struct system_heap_buffer *buffer = dmabuf->priv;
 	struct sg_table *table;
 	struct scatterlist *sg;
-	int i;
+	int i, j;
 
 	table = &buffer->sg_table;
 	for_each_sg(table->sgl, sg, table->nents, i) {
 		struct page *page = sg_page(sg);
 
-		__free_pages(page, compound_order(page));
+		for (j = 0; j < NUM_ORDERS; j++) {
+			if (compound_order(page) == orders[j])
+				break;
+		}
+		page_pool_put_full_page(pools[j], page, false);
 	}
 	sg_free_table(table);
 	kfree(buffer);
@@ -300,8 +306,7 @@  static struct page *alloc_largest_available(unsigned long size,
 			continue;
 		if (max_order < orders[i])
 			continue;
-
-		page = alloc_pages(order_flags[i], orders[i]);
+		page = page_pool_alloc_pages(pools[i], order_flags[i]);
 		if (!page)
 			continue;
 		return page;
@@ -406,6 +411,25 @@  static const struct dma_heap_ops system_heap_ops = {
 static int system_heap_create(void)
 {
 	struct dma_heap_export_info exp_info;
+	int i;
+
+	for (i = 0; i < NUM_ORDERS; i++) {
+		struct page_pool_params pp;
+
+		memset(&pp, 0, sizeof(pp));
+		pp.order = orders[i];
+		pp.dma_dir = DMA_BIDIRECTIONAL;
+		pools[i] = page_pool_create(&pp);
+
+		if (IS_ERR(pools[i])) {
+			int j;
+
+			pr_err("%s: page pool creation failed!\n", __func__);
+			for (j = 0; j < i; j++)
+				page_pool_destroy(pools[j]);
+			return PTR_ERR(pools[i]);
+		}
+	}
 
 	exp_info.name = "system";
 	exp_info.ops = &system_heap_ops;