diff mbox series

[net-next,v2,2/4] net: Introduce {netdev,napi}_alloc_frag_align()

Message ID 20210131074426.44154-3-haokexin@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: Avoid the memory waste in some Ethernet drivers | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 14 maintainers not CCed: viro@zeniv.linux.org.uk pabeni@redhat.com kyk.segfault@gmail.com jakub@cloudflare.com jonathan.lemon@gmail.com dseok.yi@samsung.com willemb@google.com elver@google.com decui@microsoft.com pablo@netfilter.org rdunlap@infradead.org linmiaohe@huawei.com nogikh@google.com gnault@redhat.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 8823 this patch: 8823
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 87 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 9224 this patch: 9224
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Kevin Hao Jan. 31, 2021, 7:44 a.m. UTC
In the current implementation of {netdev,napi}_alloc_frag(), it doesn't
have any align guarantee for the returned buffer address, But for some
hardwares they do require the DMA buffer to be aligned correctly,
so we would have to use some workarounds like below if the buffers
allocated by the {netdev,napi}_alloc_frag() are used by these hardwares
for DMA.
    buf = napi_alloc_frag(really_needed_size + align);
    buf = PTR_ALIGN(buf, align);

These codes seems ugly and would waste a lot of memories if the buffers
are used in a network driver for the TX/RX. We have added the align
support for the page_frag functions, so add the corresponding
{netdev,napi}_frag functions.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v2: Inline {netdev,napi}_alloc_frag().

 include/linux/skbuff.h | 22 ++++++++++++++++++++--
 net/core/skbuff.c      | 25 +++++++++----------------
 2 files changed, 29 insertions(+), 18 deletions(-)

Comments

Alexander Duyck Feb. 2, 2021, 4:26 p.m. UTC | #1
On Sun, Jan 31, 2021 at 12:17 AM Kevin Hao <haokexin@gmail.com> wrote:
>
> In the current implementation of {netdev,napi}_alloc_frag(), it doesn't
> have any align guarantee for the returned buffer address, But for some
> hardwares they do require the DMA buffer to be aligned correctly,
> so we would have to use some workarounds like below if the buffers
> allocated by the {netdev,napi}_alloc_frag() are used by these hardwares
> for DMA.
>     buf = napi_alloc_frag(really_needed_size + align);
>     buf = PTR_ALIGN(buf, align);
>
> These codes seems ugly and would waste a lot of memories if the buffers
> are used in a network driver for the TX/RX. We have added the align
> support for the page_frag functions, so add the corresponding
> {netdev,napi}_frag functions.
>
> Signed-off-by: Kevin Hao <haokexin@gmail.com>
> ---
> v2: Inline {netdev,napi}_alloc_frag().
>
>  include/linux/skbuff.h | 22 ++++++++++++++++++++--
>  net/core/skbuff.c      | 25 +++++++++----------------
>  2 files changed, 29 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 9313b5aaf45b..7e8beff4ff22 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -2818,7 +2818,19 @@ void skb_queue_purge(struct sk_buff_head *list);
>
>  unsigned int skb_rbtree_purge(struct rb_root *root);
>
> -void *netdev_alloc_frag(unsigned int fragsz);
> +void *netdev_alloc_frag_align(unsigned int fragsz, int align);
> +
> +/**
> + * netdev_alloc_frag - allocate a page fragment
> + * @fragsz: fragment size
> + *
> + * Allocates a frag from a page for receive buffer.
> + * Uses GFP_ATOMIC allocations.
> + */
> +static inline void *netdev_alloc_frag(unsigned int fragsz)
> +{
> +       return netdev_alloc_frag_align(fragsz, 0);
> +}
>

So one thing we may want to do is actually split this up so that we
have a __netdev_alloc_frag_align function that is called by one of two
inline functions. The standard netdev_alloc_frag would be like what
you have here, however we would be passing ~0 for the mask.

The "align" version would be taking in an unsigned int align value and
converting it to a mask. The idea is that your mask value is likely a
constant so converting the constant to a mask would be much easier to
do in an inline function as the compiler can take care of converting
the value during compile time.

An added value to that is you could also add tests to the align value
to guarantee that the value being passed is a power of 2 so that it
works with the alignment mask generation as expected.

>  struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
>                                    gfp_t gfp_mask);
> @@ -2877,7 +2889,13 @@ static inline void skb_free_frag(void *addr)
>         page_frag_free(addr);
>  }
>
> -void *napi_alloc_frag(unsigned int fragsz);
> +void *napi_alloc_frag_align(unsigned int fragsz, int align);
> +
> +static inline void *napi_alloc_frag(unsigned int fragsz)
> +{
> +       return napi_alloc_frag_align(fragsz, 0);
> +}
> +
>  struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
>                                  unsigned int length, gfp_t gfp_mask);
>  static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,

Same for the __napi_alloc_frag code. You could probably convert the
__napi_alloc_frag below into an __napi_alloc_frag_align that you pass
a mask to. Then you could convert the other two functions to either
pass ~0 or the align value and add align value validation.

> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 2af12f7e170c..a35e75f12428 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -374,29 +374,22 @@ struct napi_alloc_cache {
>  static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
>  static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
>
> -static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
> +static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask, int align)
>  {
>         struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
>
> -       return page_frag_alloc(&nc->page, fragsz, gfp_mask);
> +       return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align);
>  }
>
> -void *napi_alloc_frag(unsigned int fragsz)
> +void *napi_alloc_frag_align(unsigned int fragsz, int align)
>  {
>         fragsz = SKB_DATA_ALIGN(fragsz);
>
> -       return __napi_alloc_frag(fragsz, GFP_ATOMIC);
> +       return __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
>  }
> -EXPORT_SYMBOL(napi_alloc_frag);
> +EXPORT_SYMBOL(napi_alloc_frag_align);
>
> -/**
> - * netdev_alloc_frag - allocate a page fragment
> - * @fragsz: fragment size
> - *
> - * Allocates a frag from a page for receive buffer.
> - * Uses GFP_ATOMIC allocations.
> - */
> -void *netdev_alloc_frag(unsigned int fragsz)
> +void *netdev_alloc_frag_align(unsigned int fragsz, int align)
>  {
>         struct page_frag_cache *nc;
>         void *data;
> @@ -404,15 +397,15 @@ void *netdev_alloc_frag(unsigned int fragsz)
>         fragsz = SKB_DATA_ALIGN(fragsz);
>         if (in_irq() || irqs_disabled()) {
>                 nc = this_cpu_ptr(&netdev_alloc_cache);
> -               data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
> +               data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align);
>         } else {
>                 local_bh_disable();
> -               data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
> +               data = __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
>                 local_bh_enable();
>         }
>         return data;
>  }
> -EXPORT_SYMBOL(netdev_alloc_frag);
> +EXPORT_SYMBOL(netdev_alloc_frag_align);
>
>  /**
>   *     __netdev_alloc_skb - allocate an skbuff for rx on a specific device
> --
> 2.29.2
>
Kevin Hao Feb. 4, 2021, 6:47 a.m. UTC | #2
On Tue, Feb 02, 2021 at 08:26:19AM -0800, Alexander Duyck wrote:
> On Sun, Jan 31, 2021 at 12:17 AM Kevin Hao <haokexin@gmail.com> wrote:
> >
> > In the current implementation of {netdev,napi}_alloc_frag(), it doesn't
> > have any align guarantee for the returned buffer address, But for some
> > hardwares they do require the DMA buffer to be aligned correctly,
> > so we would have to use some workarounds like below if the buffers
> > allocated by the {netdev,napi}_alloc_frag() are used by these hardwares
> > for DMA.
> >     buf = napi_alloc_frag(really_needed_size + align);
> >     buf = PTR_ALIGN(buf, align);
> >
> > These codes seems ugly and would waste a lot of memories if the buffers
> > are used in a network driver for the TX/RX. We have added the align
> > support for the page_frag functions, so add the corresponding
> > {netdev,napi}_frag functions.
> >
> > Signed-off-by: Kevin Hao <haokexin@gmail.com>
> > ---
> > v2: Inline {netdev,napi}_alloc_frag().
> >
> >  include/linux/skbuff.h | 22 ++++++++++++++++++++--
> >  net/core/skbuff.c      | 25 +++++++++----------------
> >  2 files changed, 29 insertions(+), 18 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 9313b5aaf45b..7e8beff4ff22 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -2818,7 +2818,19 @@ void skb_queue_purge(struct sk_buff_head *list);
> >
> >  unsigned int skb_rbtree_purge(struct rb_root *root);
> >
> > -void *netdev_alloc_frag(unsigned int fragsz);
> > +void *netdev_alloc_frag_align(unsigned int fragsz, int align);
> > +
> > +/**
> > + * netdev_alloc_frag - allocate a page fragment
> > + * @fragsz: fragment size
> > + *
> > + * Allocates a frag from a page for receive buffer.
> > + * Uses GFP_ATOMIC allocations.
> > + */
> > +static inline void *netdev_alloc_frag(unsigned int fragsz)
> > +{
> > +       return netdev_alloc_frag_align(fragsz, 0);
> > +}
> >
> 
> So one thing we may want to do is actually split this up so that we
> have a __netdev_alloc_frag_align function that is called by one of two
> inline functions. The standard netdev_alloc_frag would be like what
> you have here, however we would be passing ~0 for the mask.
> 
> The "align" version would be taking in an unsigned int align value and
> converting it to a mask. The idea is that your mask value is likely a
> constant so converting the constant to a mask would be much easier to
> do in an inline function as the compiler can take care of converting
> the value during compile time.
> 
> An added value to that is you could also add tests to the align value
> to guarantee that the value being passed is a power of 2 so that it
> works with the alignment mask generation as expected.

Fair enough. Thanks Alexander.

Kevin

> 
> >  struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
> >                                    gfp_t gfp_mask);
> > @@ -2877,7 +2889,13 @@ static inline void skb_free_frag(void *addr)
> >         page_frag_free(addr);
> >  }
> >
> > -void *napi_alloc_frag(unsigned int fragsz);
> > +void *napi_alloc_frag_align(unsigned int fragsz, int align);
> > +
> > +static inline void *napi_alloc_frag(unsigned int fragsz)
> > +{
> > +       return napi_alloc_frag_align(fragsz, 0);
> > +}
> > +
> >  struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
> >                                  unsigned int length, gfp_t gfp_mask);
> >  static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
> 
> Same for the __napi_alloc_frag code. You could probably convert the
> __napi_alloc_frag below into an __napi_alloc_frag_align that you pass
> a mask to. Then you could convert the other two functions to either
> pass ~0 or the align value and add align value validation.
> 
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index 2af12f7e170c..a35e75f12428 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -374,29 +374,22 @@ struct napi_alloc_cache {
> >  static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
> >  static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
> >
> > -static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
> > +static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask, int align)
> >  {
> >         struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
> >
> > -       return page_frag_alloc(&nc->page, fragsz, gfp_mask);
> > +       return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align);
> >  }
> >
> > -void *napi_alloc_frag(unsigned int fragsz)
> > +void *napi_alloc_frag_align(unsigned int fragsz, int align)
> >  {
> >         fragsz = SKB_DATA_ALIGN(fragsz);
> >
> > -       return __napi_alloc_frag(fragsz, GFP_ATOMIC);
> > +       return __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
> >  }
> > -EXPORT_SYMBOL(napi_alloc_frag);
> > +EXPORT_SYMBOL(napi_alloc_frag_align);
> >
> > -/**
> > - * netdev_alloc_frag - allocate a page fragment
> > - * @fragsz: fragment size
> > - *
> > - * Allocates a frag from a page for receive buffer.
> > - * Uses GFP_ATOMIC allocations.
> > - */
> > -void *netdev_alloc_frag(unsigned int fragsz)
> > +void *netdev_alloc_frag_align(unsigned int fragsz, int align)
> >  {
> >         struct page_frag_cache *nc;
> >         void *data;
> > @@ -404,15 +397,15 @@ void *netdev_alloc_frag(unsigned int fragsz)
> >         fragsz = SKB_DATA_ALIGN(fragsz);
> >         if (in_irq() || irqs_disabled()) {
> >                 nc = this_cpu_ptr(&netdev_alloc_cache);
> > -               data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
> > +               data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align);
> >         } else {
> >                 local_bh_disable();
> > -               data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
> > +               data = __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
> >                 local_bh_enable();
> >         }
> >         return data;
> >  }
> > -EXPORT_SYMBOL(netdev_alloc_frag);
> > +EXPORT_SYMBOL(netdev_alloc_frag_align);
> >
> >  /**
> >   *     __netdev_alloc_skb - allocate an skbuff for rx on a specific device
> > --
> > 2.29.2
> >
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9313b5aaf45b..7e8beff4ff22 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2818,7 +2818,19 @@  void skb_queue_purge(struct sk_buff_head *list);
 
 unsigned int skb_rbtree_purge(struct rb_root *root);
 
-void *netdev_alloc_frag(unsigned int fragsz);
+void *netdev_alloc_frag_align(unsigned int fragsz, int align);
+
+/**
+ * netdev_alloc_frag - allocate a page fragment
+ * @fragsz: fragment size
+ *
+ * Allocates a frag from a page for receive buffer.
+ * Uses GFP_ATOMIC allocations.
+ */
+static inline void *netdev_alloc_frag(unsigned int fragsz)
+{
+	return netdev_alloc_frag_align(fragsz, 0);
+}
 
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
 				   gfp_t gfp_mask);
@@ -2877,7 +2889,13 @@  static inline void skb_free_frag(void *addr)
 	page_frag_free(addr);
 }
 
-void *napi_alloc_frag(unsigned int fragsz);
+void *napi_alloc_frag_align(unsigned int fragsz, int align);
+
+static inline void *napi_alloc_frag(unsigned int fragsz)
+{
+	return napi_alloc_frag_align(fragsz, 0);
+}
+
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
 				 unsigned int length, gfp_t gfp_mask);
 static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2af12f7e170c..a35e75f12428 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -374,29 +374,22 @@  struct napi_alloc_cache {
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask, int align)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
+	return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align);
 }
 
-void *napi_alloc_frag(unsigned int fragsz)
+void *napi_alloc_frag_align(unsigned int fragsz, int align)
 {
 	fragsz = SKB_DATA_ALIGN(fragsz);
 
-	return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+	return __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
 }
-EXPORT_SYMBOL(napi_alloc_frag);
+EXPORT_SYMBOL(napi_alloc_frag_align);
 
-/**
- * netdev_alloc_frag - allocate a page fragment
- * @fragsz: fragment size
- *
- * Allocates a frag from a page for receive buffer.
- * Uses GFP_ATOMIC allocations.
- */
-void *netdev_alloc_frag(unsigned int fragsz)
+void *netdev_alloc_frag_align(unsigned int fragsz, int align)
 {
 	struct page_frag_cache *nc;
 	void *data;
@@ -404,15 +397,15 @@  void *netdev_alloc_frag(unsigned int fragsz)
 	fragsz = SKB_DATA_ALIGN(fragsz);
 	if (in_irq() || irqs_disabled()) {
 		nc = this_cpu_ptr(&netdev_alloc_cache);
-		data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
+		data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align);
 	} else {
 		local_bh_disable();
-		data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
+		data = __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
 		local_bh_enable();
 	}
 	return data;
 }
-EXPORT_SYMBOL(netdev_alloc_frag);
+EXPORT_SYMBOL(netdev_alloc_frag_align);
 
 /**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device