diff mbox series

[net-next,v2,1/4] mm: page_frag: Introduce page_frag_alloc_align()

Message ID 20210131074426.44154-2-haokexin@gmail.com (mailing list archive)
State New, archived
Headers show
Series net: Avoid the memory waste in some Ethernet drivers | expand

Commit Message

Kevin Hao Jan. 31, 2021, 7:44 a.m. UTC
In the current implementation of page_frag_alloc(), it doesn't have
any align guarantee for the returned buffer address. But for some
hardwares they do require the DMA buffer to be aligned correctly,
so we would have to use some workarounds like below if the buffers
allocated by the page_frag_alloc() are used by these hardwares for
DMA.
    buf = page_frag_alloc(really_needed_size + align);
    buf = PTR_ALIGN(buf, align);

These codes seems ugly and would waste a lot of memories if the buffers
are used in a network driver for the TX/RX. So introduce
page_frag_alloc_align() to make sure that an aligned buffer address is
returned.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
v2: 
  - Inline page_frag_alloc()
  - Adopt Vlastimil's suggestion and add his Acked-by
 
 include/linux/gfp.h | 12 ++++++++++--
 mm/page_alloc.c     |  8 +++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

Comments

Ioana Ciornei Feb. 2, 2021, 11:36 a.m. UTC | #1
On Sun, Jan 31, 2021 at 03:44:23PM +0800, Kevin Hao wrote:
> In the current implementation of page_frag_alloc(), it doesn't have
> any align guarantee for the returned buffer address. But for some
> hardwares they do require the DMA buffer to be aligned correctly,
> so we would have to use some workarounds like below if the buffers
> allocated by the page_frag_alloc() are used by these hardwares for
> DMA.
>     buf = page_frag_alloc(really_needed_size + align);
>     buf = PTR_ALIGN(buf, align);
> 
> These codes seems ugly and would waste a lot of memories if the buffers
> are used in a network driver for the TX/RX.

Isn't the memory wasted even with this change?

I am not familiar with the frag allocator so I might be missing
something, but from what I understood each page_frag_cache keeps only
the offset inside the current page being allocated, offset which you
ALIGN_DOWN() to match the alignment requirement. I don't see how that
memory between the non-aligned and aligned offset is going to be used
again before the entire page is freed.

> So introduce
> page_frag_alloc_align() to make sure that an aligned buffer address is
> returned.
> 
> Signed-off-by: Kevin Hao <haokexin@gmail.com>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> v2: 
>   - Inline page_frag_alloc()
>   - Adopt Vlastimil's suggestion and add his Acked-by
>  
>  include/linux/gfp.h | 12 ++++++++++--
>  mm/page_alloc.c     |  8 +++++---
>  2 files changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 6e479e9c48ce..39f4b3070d09 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -583,8 +583,16 @@ extern void free_pages(unsigned long addr, unsigned int order);
>  
>  struct page_frag_cache;
>  extern void __page_frag_cache_drain(struct page *page, unsigned int count);
> -extern void *page_frag_alloc(struct page_frag_cache *nc,
> -			     unsigned int fragsz, gfp_t gfp_mask);
> +extern void *page_frag_alloc_align(struct page_frag_cache *nc,
> +				   unsigned int fragsz, gfp_t gfp_mask,
> +				   int align);
> +
> +static inline void *page_frag_alloc(struct page_frag_cache *nc,
> +			     unsigned int fragsz, gfp_t gfp_mask)
> +{
> +	return page_frag_alloc_align(nc, fragsz, gfp_mask, 0);
> +}
> +
>  extern void page_frag_free(void *addr);
>  
>  #define __free_page(page) __free_pages((page), 0)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 519a60d5b6f7..4667e7b6993b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5137,8 +5137,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
>  }
>  EXPORT_SYMBOL(__page_frag_cache_drain);
>  
> -void *page_frag_alloc(struct page_frag_cache *nc,
> -		      unsigned int fragsz, gfp_t gfp_mask)
> +void *page_frag_alloc_align(struct page_frag_cache *nc,
> +		      unsigned int fragsz, gfp_t gfp_mask, int align)
>  {
>  	unsigned int size = PAGE_SIZE;
>  	struct page *page;
> @@ -5190,11 +5190,13 @@ void *page_frag_alloc(struct page_frag_cache *nc,
>  	}
>  
>  	nc->pagecnt_bias--;
> +	if (align)
> +		offset = ALIGN_DOWN(offset, align);
>  	nc->offset = offset;
>  
>  	return nc->va + offset;
>  }
> -EXPORT_SYMBOL(page_frag_alloc);
> +EXPORT_SYMBOL(page_frag_alloc_align);
>  
>  /*
>   * Frees a page fragment allocated out of either a compound or order 0 page.
> -- 
> 2.29.2
>
Vlastimil Babka Feb. 2, 2021, 11:48 a.m. UTC | #2
On 2/2/21 12:36 PM, Ioana Ciornei wrote:
> On Sun, Jan 31, 2021 at 03:44:23PM +0800, Kevin Hao wrote:
>> In the current implementation of page_frag_alloc(), it doesn't have
>> any align guarantee for the returned buffer address. But for some
>> hardwares they do require the DMA buffer to be aligned correctly,
>> so we would have to use some workarounds like below if the buffers
>> allocated by the page_frag_alloc() are used by these hardwares for
>> DMA.
>>     buf = page_frag_alloc(really_needed_size + align);
>>     buf = PTR_ALIGN(buf, align);
>> 
>> These codes seems ugly and would waste a lot of memories if the buffers
>> are used in a network driver for the TX/RX.
> 
> Isn't the memory wasted even with this change?

Yes, but less of it. Not always full amount of align, but up to it. Perhaps even
zero.

> I am not familiar with the frag allocator so I might be missing
> something, but from what I understood each page_frag_cache keeps only
> the offset inside the current page being allocated, offset which you
> ALIGN_DOWN() to match the alignment requirement. I don't see how that
> memory between the non-aligned and aligned offset is going to be used
> again before the entire page is freed.

True, thath's how page_frag is designed. The align amounts would be most likely
too small to be usable anyway.
Ioana Ciornei Feb. 2, 2021, 12:31 p.m. UTC | #3
On Tue, Feb 02, 2021 at 12:48:28PM +0100, Vlastimil Babka wrote:
> On 2/2/21 12:36 PM, Ioana Ciornei wrote:
> > On Sun, Jan 31, 2021 at 03:44:23PM +0800, Kevin Hao wrote:
> >> In the current implementation of page_frag_alloc(), it doesn't have
> >> any align guarantee for the returned buffer address. But for some
> >> hardwares they do require the DMA buffer to be aligned correctly,
> >> so we would have to use some workarounds like below if the buffers
> >> allocated by the page_frag_alloc() are used by these hardwares for
> >> DMA.
> >>     buf = page_frag_alloc(really_needed_size + align);
> >>     buf = PTR_ALIGN(buf, align);
> >> 
> >> These codes seems ugly and would waste a lot of memories if the buffers
> >> are used in a network driver for the TX/RX.
> > 
> > Isn't the memory wasted even with this change?
> 
> Yes, but less of it. Not always full amount of align, but up to it. Perhaps even
> zero.

Indeed, the worst case is still there but we gain by not allocating the
full 'size + align' all the time.

Thanks.

> 
> > I am not familiar with the frag allocator so I might be missing
> > something, but from what I understood each page_frag_cache keeps only
> > the offset inside the current page being allocated, offset which you
> > ALIGN_DOWN() to match the alignment requirement. I don't see how that
> > memory between the non-aligned and aligned offset is going to be used
> > again before the entire page is freed.
> 
> True, thath's how page_frag is designed. The align amounts would be most likely
> too small to be usable anyway.
Alexander Duyck Feb. 2, 2021, 4:19 p.m. UTC | #4
On Sat, Jan 30, 2021 at 11:54 PM Kevin Hao <haokexin@gmail.com> wrote:
>
> In the current implementation of page_frag_alloc(), it doesn't have
> any align guarantee for the returned buffer address. But for some
> hardwares they do require the DMA buffer to be aligned correctly,
> so we would have to use some workarounds like below if the buffers
> allocated by the page_frag_alloc() are used by these hardwares for
> DMA.
>     buf = page_frag_alloc(really_needed_size + align);
>     buf = PTR_ALIGN(buf, align);
>
> These codes seems ugly and would waste a lot of memories if the buffers
> are used in a network driver for the TX/RX. So introduce
> page_frag_alloc_align() to make sure that an aligned buffer address is
> returned.
>
> Signed-off-by: Kevin Hao <haokexin@gmail.com>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> v2:
>   - Inline page_frag_alloc()
>   - Adopt Vlastimil's suggestion and add his Acked-by
>
>  include/linux/gfp.h | 12 ++++++++++--
>  mm/page_alloc.c     |  8 +++++---
>  2 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 6e479e9c48ce..39f4b3070d09 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -583,8 +583,16 @@ extern void free_pages(unsigned long addr, unsigned int order);
>
>  struct page_frag_cache;
>  extern void __page_frag_cache_drain(struct page *page, unsigned int count);
> -extern void *page_frag_alloc(struct page_frag_cache *nc,
> -                            unsigned int fragsz, gfp_t gfp_mask);
> +extern void *page_frag_alloc_align(struct page_frag_cache *nc,
> +                                  unsigned int fragsz, gfp_t gfp_mask,
> +                                  int align);
> +
> +static inline void *page_frag_alloc(struct page_frag_cache *nc,
> +                            unsigned int fragsz, gfp_t gfp_mask)
> +{
> +       return page_frag_alloc_align(nc, fragsz, gfp_mask, 0);
> +}
> +
>  extern void page_frag_free(void *addr);
>
>  #define __free_page(page) __free_pages((page), 0)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 519a60d5b6f7..4667e7b6993b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5137,8 +5137,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
>  }
>  EXPORT_SYMBOL(__page_frag_cache_drain);
>
> -void *page_frag_alloc(struct page_frag_cache *nc,
> -                     unsigned int fragsz, gfp_t gfp_mask)
> +void *page_frag_alloc_align(struct page_frag_cache *nc,
> +                     unsigned int fragsz, gfp_t gfp_mask, int align)

I would make "align" unsigned since really we are using it as a mask.
Actually passing it as a mask might be even better. More on that
below.

>  {
>         unsigned int size = PAGE_SIZE;
>         struct page *page;
> @@ -5190,11 +5190,13 @@ void *page_frag_alloc(struct page_frag_cache *nc,
>         }
>
>         nc->pagecnt_bias--;
> +       if (align)
> +               offset = ALIGN_DOWN(offset, align);
>         nc->offset = offset;
>
>         return nc->va + offset;
>  }
> -EXPORT_SYMBOL(page_frag_alloc);
> +EXPORT_SYMBOL(page_frag_alloc_align);
>
>  /*
>   * Frees a page fragment allocated out of either a compound or order 0 page.

Rather than using the conditional branch it might be better to just do
"offset &= align_mask". Then you would be adding at most 1 instruction
which can likely occur in parallel with the other work that is going
on versus the conditional branch which requires a test, jump, and then
the 3 alignment instructions to do the subtraction, inversion, and
AND.

However it would ripple through the other patches as you would also
need to update you other patches to assume ~0 in the unaligned case,
however with your masked cases you could just use the negative
alignment value to generate your mask which would likely be taken care
of by the compiler.
Kevin Hao Feb. 4, 2021, 6:40 a.m. UTC | #5
On Tue, Feb 02, 2021 at 08:19:54AM -0800, Alexander Duyck wrote:
> On Sat, Jan 30, 2021 at 11:54 PM Kevin Hao <haokexin@gmail.com> wrote:
> >
> > In the current implementation of page_frag_alloc(), it doesn't have
> > any align guarantee for the returned buffer address. But for some
> > hardwares they do require the DMA buffer to be aligned correctly,
> > so we would have to use some workarounds like below if the buffers
> > allocated by the page_frag_alloc() are used by these hardwares for
> > DMA.
> >     buf = page_frag_alloc(really_needed_size + align);
> >     buf = PTR_ALIGN(buf, align);
> >
> > These codes seems ugly and would waste a lot of memories if the buffers
> > are used in a network driver for the TX/RX. So introduce
> > page_frag_alloc_align() to make sure that an aligned buffer address is
> > returned.
> >
> > Signed-off-by: Kevin Hao <haokexin@gmail.com>
> > Acked-by: Vlastimil Babka <vbabka@suse.cz>
> > ---
> > v2:
> >   - Inline page_frag_alloc()
> >   - Adopt Vlastimil's suggestion and add his Acked-by
> >
> >  include/linux/gfp.h | 12 ++++++++++--
> >  mm/page_alloc.c     |  8 +++++---
> >  2 files changed, 15 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> > index 6e479e9c48ce..39f4b3070d09 100644
> > --- a/include/linux/gfp.h
> > +++ b/include/linux/gfp.h
> > @@ -583,8 +583,16 @@ extern void free_pages(unsigned long addr, unsigned int order);
> >
> >  struct page_frag_cache;
> >  extern void __page_frag_cache_drain(struct page *page, unsigned int count);
> > -extern void *page_frag_alloc(struct page_frag_cache *nc,
> > -                            unsigned int fragsz, gfp_t gfp_mask);
> > +extern void *page_frag_alloc_align(struct page_frag_cache *nc,
> > +                                  unsigned int fragsz, gfp_t gfp_mask,
> > +                                  int align);
> > +
> > +static inline void *page_frag_alloc(struct page_frag_cache *nc,
> > +                            unsigned int fragsz, gfp_t gfp_mask)
> > +{
> > +       return page_frag_alloc_align(nc, fragsz, gfp_mask, 0);
> > +}
> > +
> >  extern void page_frag_free(void *addr);
> >
> >  #define __free_page(page) __free_pages((page), 0)
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 519a60d5b6f7..4667e7b6993b 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -5137,8 +5137,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
> >  }
> >  EXPORT_SYMBOL(__page_frag_cache_drain);
> >
> > -void *page_frag_alloc(struct page_frag_cache *nc,
> > -                     unsigned int fragsz, gfp_t gfp_mask)
> > +void *page_frag_alloc_align(struct page_frag_cache *nc,
> > +                     unsigned int fragsz, gfp_t gfp_mask, int align)
> 
> I would make "align" unsigned since really we are using it as a mask.
> Actually passing it as a mask might be even better. More on that
> below.
> 
> >  {
> >         unsigned int size = PAGE_SIZE;
> >         struct page *page;
> > @@ -5190,11 +5190,13 @@ void *page_frag_alloc(struct page_frag_cache *nc,
> >         }
> >
> >         nc->pagecnt_bias--;
> > +       if (align)
> > +               offset = ALIGN_DOWN(offset, align);
> >         nc->offset = offset;
> >
> >         return nc->va + offset;
> >  }
> > -EXPORT_SYMBOL(page_frag_alloc);
> > +EXPORT_SYMBOL(page_frag_alloc_align);
> >
> >  /*
> >   * Frees a page fragment allocated out of either a compound or order 0 page.
> 
> Rather than using the conditional branch it might be better to just do
> "offset &= align_mask". Then you would be adding at most 1 instruction
> which can likely occur in parallel with the other work that is going
> on versus the conditional branch which requires a test, jump, and then
> the 3 alignment instructions to do the subtraction, inversion, and
> AND.

On arm64:

       if (align)
               offset = ALIGN_DOWN(offset, align);

	4b1503e2        neg     w2, w21
	710002bf        cmp     w21, #0x0
	0a020082        and     w2, w4, w2
	1a841044        csel    w4, w2, w4, ne  // ne = any


	offset &= align_mask

	0a0402a4        and     w4, w21, w4


Yes, we do cut 3 instructions by using align mask.

> 
> However it would ripple through the other patches as you would also
> need to update you other patches to assume ~0 in the unaligned case,
> however with your masked cases you could just use the negative
> alignment value to generate your mask which would likely be taken care
> of by the compiler.

Will do.

Thanks,
Kevin
diff mbox series

Patch

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6e479e9c48ce..39f4b3070d09 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -583,8 +583,16 @@  extern void free_pages(unsigned long addr, unsigned int order);
 
 struct page_frag_cache;
 extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-extern void *page_frag_alloc(struct page_frag_cache *nc,
-			     unsigned int fragsz, gfp_t gfp_mask);
+extern void *page_frag_alloc_align(struct page_frag_cache *nc,
+				   unsigned int fragsz, gfp_t gfp_mask,
+				   int align);
+
+static inline void *page_frag_alloc(struct page_frag_cache *nc,
+			     unsigned int fragsz, gfp_t gfp_mask)
+{
+	return page_frag_alloc_align(nc, fragsz, gfp_mask, 0);
+}
+
 extern void page_frag_free(void *addr);
 
 #define __free_page(page) __free_pages((page), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 519a60d5b6f7..4667e7b6993b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5137,8 +5137,8 @@  void __page_frag_cache_drain(struct page *page, unsigned int count)
 }
 EXPORT_SYMBOL(__page_frag_cache_drain);
 
-void *page_frag_alloc(struct page_frag_cache *nc,
-		      unsigned int fragsz, gfp_t gfp_mask)
+void *page_frag_alloc_align(struct page_frag_cache *nc,
+		      unsigned int fragsz, gfp_t gfp_mask, int align)
 {
 	unsigned int size = PAGE_SIZE;
 	struct page *page;
@@ -5190,11 +5190,13 @@  void *page_frag_alloc(struct page_frag_cache *nc,
 	}
 
 	nc->pagecnt_bias--;
+	if (align)
+		offset = ALIGN_DOWN(offset, align);
 	nc->offset = offset;
 
 	return nc->va + offset;
 }
-EXPORT_SYMBOL(page_frag_alloc);
+EXPORT_SYMBOL(page_frag_alloc_align);
 
 /*
  * Frees a page fragment allocated out of either a compound or order 0 page.