diff mbox series

[net-next,3/5] virtio_net: Add page pool fragmentation support

Message ID 20230526054621.18371-3-liangchen.linux@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,1/5] virtio_net: Fix an unsafe reference to the page chain | expand

Checks

Context Check Description
netdev/series_format warning Series does not have a cover letter
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 5 maintainers not CCed: hawk@kernel.org daniel@iogearbox.net john.fastabend@gmail.com bpf@vger.kernel.org ast@kernel.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api fail Found: 'module_param' was: 0 now: 1
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Liang Chen May 26, 2023, 5:46 a.m. UTC
To further enhance performance, implement page pool fragmentation
support and introduce a module parameter to enable or disable it.

In single-core vm testing environments, there is an additional performance
gain observed in the normal path compared to the one packet per page
approach.
  Upstream codebase: 47.5 Gbits/sec
  Upstream codebase with page pool: 50.2 Gbits/sec
  Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec

There is also some performance gain for XDP cpumap.
  Upstream codebase: 1.38 Gbits/sec
  Upstream codebase with page pool: 9.74 Gbits/sec
  Upstream codebase with page pool fragmentation: 10.3 Gbits/sec

Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
---
 drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++----------
 1 file changed, 55 insertions(+), 17 deletions(-)

Comments

Horatiu Vultur May 26, 2023, 8:29 a.m. UTC | #1
The 05/26/2023 13:46, Liang Chen wrote:

Hi Liang,

> 
> To further enhance performance, implement page pool fragmentation
> support and introduce a module parameter to enable or disable it.
> 
> In single-core vm testing environments, there is an additional performance
> gain observed in the normal path compared to the one packet per page
> approach.
>   Upstream codebase: 47.5 Gbits/sec
>   Upstream codebase with page pool: 50.2 Gbits/sec
>   Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec
> 
> There is also some performance gain for XDP cpumap.
>   Upstream codebase: 1.38 Gbits/sec
>   Upstream codebase with page pool: 9.74 Gbits/sec
>   Upstream codebase with page pool fragmentation: 10.3 Gbits/sec
> 
> Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
> ---
>  drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++----------
>  1 file changed, 55 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 99c0ca0c1781..ac40b8c66c59 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
>  module_param(napi_tx, bool, 0644);
> 
>  static bool page_pool_enabled;
> +static bool page_pool_frag;
>  module_param(page_pool_enabled, bool, 0400);
> +module_param(page_pool_frag, bool, 0400);
> 
>  /* FIXME: MTU in config. */
>  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
> @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
>                                        struct page *p,
>                                        int offset,
>                                        int page_off,
> -                                      unsigned int *len)
> +                                      unsigned int *len,
> +                                          unsigned int *pp_frag_offset)

The 'unsigned int *pp_frag_offset' seems to be unaligned.

>  {
>         int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>         struct page *page;
> +       unsigned int pp_frag_offset_val;

Please use reverse christmas tree notation here. The pp_frag_offset_val
needs to be declared before page;

> 
>         if (page_off + *len + tailroom > PAGE_SIZE)
>                 return NULL;
> 
>         if (rq->page_pool)
> -               page = page_pool_dev_alloc_pages(rq->page_pool);
> +               if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> +                       page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset,
> +                                                       PAGE_SIZE);

Don't you need to check if pp_frag_offset is null? As you call once with
NULL.

> +               else
> +                       page = page_pool_dev_alloc_pages(rq->page_pool);
>         else
>                 page = alloc_page(GFP_ATOMIC);
> 
>         if (!page)
>                 return NULL;
> 
> -       memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> +       pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
> +
> +       memcpy(page_address(page) + page_off + pp_frag_offset_val,
> +              page_address(p) + offset, *len);
>         page_off += *len;
> 
>         while (--*num_buf) {
> @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
>                         goto err_buf;
>                 }
> 
> -               memcpy(page_address(page) + page_off,
> +               memcpy(page_address(page) + page_off + pp_frag_offset_val,
>                        page_address(p) + off, buflen);
>                 page_off += buflen;
>                 virtnet_put_page(rq, p);
> @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
>                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>                 xdp_page = xdp_linearize_page(rq, &num_buf, page,
>                                               offset, header_offset,
> -                                             &tlen);
> +                                             &tlen, NULL);
>                 if (!xdp_page)
>                         goto err_xdp;
> 
> @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>         unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>         struct page *xdp_page;
>         unsigned int xdp_room;
> +       unsigned int page_frag_offset = 0;

Please use reverse x-mas tree notation.

> 
>         /* Transient failure which in theory could occur if
>          * in-flight packets from before XDP was enabled reach
> @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>                 xdp_page = xdp_linearize_page(rq, num_buf,
>                                               *page, offset,
>                                               VIRTIO_XDP_HEADROOM,
> -                                             len);
> +                                             len,
> +                                                 &page_frag_offset);

You have also here some misalignment with regards to page_frag_offset.

>                 if (!xdp_page)
>                         return NULL;
>         } else {
> @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>                         return NULL;
> 
>                 if (rq->page_pool)
> -                       xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> +                       if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> +                               xdp_page = page_pool_dev_alloc_frag(rq->page_pool,
> +                                                                   &page_frag_offset, PAGE_SIZE);
> +                       else
> +                               xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
>                 else
>                         xdp_page = alloc_page(GFP_ATOMIC);
> +
>                 if (!xdp_page)
>                         return NULL;
> 
> -               memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> -                      page_address(*page) + offset, *len);
> +               memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM +
> +                               page_frag_offset, page_address(*page) + offset, *len);
>         }
> 
>         *frame_sz = PAGE_SIZE;
> @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> 
>         *page = xdp_page;
> 
> -       return page_address(*page) + VIRTIO_XDP_HEADROOM;
> +       return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset;
>  }
> 
>  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
>         void *ctx;
>         int err;
>         unsigned int len, hole;
> +       unsigned int pp_frag_offset;

There same here.

> 
>         /* Extra tailroom is needed to satisfy XDP's assumption. This
>          * means rx frags coalescing won't work, but consider we've
> @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
>          */
>         len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
>         if (rq->page_pool) {
> -               struct page *page;
> +               if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> +                       if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> +                                                              &pp_frag_offset, len + room)))
> +                               return -ENOMEM;
> +                       buf = (char *)page_address(rq->page_pool->frag_page) +
> +                               pp_frag_offset;
> +                       buf += headroom; /* advance address leaving hole at front of pkt */
> +                       hole = (PAGE_SIZE << rq->page_pool->p.order)
> +                               - rq->page_pool->frag_offset;
> +                       if (hole < len + room) {
> +                               if (!headroom)
> +                                       len += hole;
> +                               rq->page_pool->frag_offset += hole;
> +                       }
> +               } else {
> +                       struct page *page;
> 
> -               page = page_pool_dev_alloc_pages(rq->page_pool);
> -               if (unlikely(!page))
> -                       return -ENOMEM;
> -               buf = (char *)page_address(page);
> -               buf += headroom; /* advance address leaving hole at front of pkt */
> +                       page = page_pool_dev_alloc_pages(rq->page_pool);
> +                       if (unlikely(!page))
> +                               return -ENOMEM;
> +                       buf = (char *)page_address(page);
> +                       buf += headroom; /* advance address leaving hole at front of pkt */
> +               }
>         } else {
>                 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
>                         return -ENOMEM;
> @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
>         struct virtio_device *vdev = rq->vq->vdev;
> 
>         struct page_pool_params pp_params = {
> -               .order = 0,
> +               .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
>                 .pool_size = rq->vq->num_max,
>                 .nid = dev_to_node(vdev->dev.parent),
>                 .dev = vdev->dev.parent,
>                 .offset = 0,
>         };
> 
> +       if (page_pool_frag)
> +               pp_params.flags |= PP_FLAG_PAGE_FRAG;
> +
>         rq->page_pool = page_pool_create(&pp_params);
>         if (IS_ERR(rq->page_pool)) {
>                 dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
> --
> 2.31.1
> 
>
kernel test robot May 26, 2023, 5:44 p.m. UTC | #2
Hi Liang,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/virtio_net-Add-page_pool-support-to-improve-performance/20230526-135805
base:   net-next/main
patch link:    https://lore.kernel.org/r/20230526054621.18371-3-liangchen.linux%40gmail.com
patch subject: [PATCH net-next 3/5] virtio_net: Add page pool fragmentation support
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20230527/202305270116.TJ31IjNL-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/dda0469e059354b61192e1d25b77c57351346282
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Liang-Chen/virtio_net-Add-page_pool-support-to-improve-performance/20230526-135805
        git checkout dda0469e059354b61192e1d25b77c57351346282
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 olddefconfig
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305270116.TJ31IjNL-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: vmlinux.o: in function `virtnet_find_vqs':
   virtio_net.c:(.text+0x901fd2): undefined reference to `page_pool_create'
   ld: vmlinux.o: in function `add_recvbuf_mergeable.isra.0':
   virtio_net.c:(.text+0x905662): undefined reference to `page_pool_alloc_pages'
>> ld: virtio_net.c:(.text+0x905715): undefined reference to `page_pool_alloc_frag'
   ld: vmlinux.o: in function `xdp_linearize_page':
   virtio_net.c:(.text+0x906c50): undefined reference to `page_pool_alloc_pages'
   ld: virtio_net.c:(.text+0x906e33): undefined reference to `page_pool_alloc_frag'
   ld: vmlinux.o: in function `mergeable_xdp_get_buf.isra.0':
>> virtio_net.c:(.text+0x90740e): undefined reference to `page_pool_alloc_frag'
>> ld: virtio_net.c:(.text+0x90750b): undefined reference to `page_pool_alloc_pages'
Liang Chen May 27, 2023, 12:36 p.m. UTC | #3
On Fri, May 26, 2023 at 4:29 PM Horatiu Vultur
<horatiu.vultur@microchip.com> wrote:
>
> The 05/26/2023 13:46, Liang Chen wrote:
>
> Hi Liang,
>
> >
> > To further enhance performance, implement page pool fragmentation
> > support and introduce a module parameter to enable or disable it.
> >
> > In single-core vm testing environments, there is an additional performance
> > gain observed in the normal path compared to the one packet per page
> > approach.
> >   Upstream codebase: 47.5 Gbits/sec
> >   Upstream codebase with page pool: 50.2 Gbits/sec
> >   Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec
> >
> > There is also some performance gain for XDP cpumap.
> >   Upstream codebase: 1.38 Gbits/sec
> >   Upstream codebase with page pool: 9.74 Gbits/sec
> >   Upstream codebase with page pool fragmentation: 10.3 Gbits/sec
> >
> > Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
> > ---
> >  drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++----------
> >  1 file changed, 55 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 99c0ca0c1781..ac40b8c66c59 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
> >  module_param(napi_tx, bool, 0644);
> >
> >  static bool page_pool_enabled;
> > +static bool page_pool_frag;
> >  module_param(page_pool_enabled, bool, 0400);
> > +module_param(page_pool_frag, bool, 0400);
> >
> >  /* FIXME: MTU in config. */
> >  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
> > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
> >                                        struct page *p,
> >                                        int offset,
> >                                        int page_off,
> > -                                      unsigned int *len)
> > +                                      unsigned int *len,
> > +                                          unsigned int *pp_frag_offset)
>
> The 'unsigned int *pp_frag_offset' seems to be unaligned.
>

Sure, Thanks!
> >  {
> >         int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> >         struct page *page;
> > +       unsigned int pp_frag_offset_val;
>
> Please use reverse christmas tree notation here. The pp_frag_offset_val
> needs to be declared before page;
>

Sure. Will do on v2.
> >
> >         if (page_off + *len + tailroom > PAGE_SIZE)
> >                 return NULL;
> >
> >         if (rq->page_pool)
> > -               page = page_pool_dev_alloc_pages(rq->page_pool);
> > +               if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> > +                       page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset,
> > +                                                       PAGE_SIZE);
>
> Don't you need to check if pp_frag_offset is null? As you call once with
> NULL.
>

At the moment, page_pool is enabled only for mergeable mode, and the
path leading to a call with NULL pp_frag_offset is from small mode.
But I will evaluate again whether it is beneficial to support
page_pool for small mode on v2. Thanks.
> > +               else
> > +                       page = page_pool_dev_alloc_pages(rq->page_pool);
> >         else
> >                 page = alloc_page(GFP_ATOMIC);
> >
> >         if (!page)
> >                 return NULL;
> >
> > -       memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> > +       pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
> > +
> > +       memcpy(page_address(page) + page_off + pp_frag_offset_val,
> > +              page_address(p) + offset, *len);
> >         page_off += *len;
> >
> >         while (--*num_buf) {
> > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
> >                         goto err_buf;
> >                 }
> >
> > -               memcpy(page_address(page) + page_off,
> > +               memcpy(page_address(page) + page_off + pp_frag_offset_val,
> >                        page_address(p) + off, buflen);
> >                 page_off += buflen;
> >                 virtnet_put_page(rq, p);
> > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
> >                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> >                 xdp_page = xdp_linearize_page(rq, &num_buf, page,
> >                                               offset, header_offset,
> > -                                             &tlen);
> > +                                             &tlen, NULL);
> >                 if (!xdp_page)
> >                         goto err_xdp;
> >
> > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >         unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> >         struct page *xdp_page;
> >         unsigned int xdp_room;
> > +       unsigned int page_frag_offset = 0;
>
> Please use reverse x-mas tree notation.
>

Sure. Will do on v2.
> >
> >         /* Transient failure which in theory could occur if
> >          * in-flight packets from before XDP was enabled reach
> > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >                 xdp_page = xdp_linearize_page(rq, num_buf,
> >                                               *page, offset,
> >                                               VIRTIO_XDP_HEADROOM,
> > -                                             len);
> > +                                             len,
> > +                                                 &page_frag_offset);
>
> You have also here some misalignment with regards to page_frag_offset.
>

Sure, Thanks!
> >                 if (!xdp_page)
> >                         return NULL;
> >         } else {
> > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >                         return NULL;
> >
> >                 if (rq->page_pool)
> > -                       xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> > +                       if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> > +                               xdp_page = page_pool_dev_alloc_frag(rq->page_pool,
> > +                                                                   &page_frag_offset, PAGE_SIZE);
> > +                       else
> > +                               xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> >                 else
> >                         xdp_page = alloc_page(GFP_ATOMIC);
> > +
> >                 if (!xdp_page)
> >                         return NULL;
> >
> > -               memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> > -                      page_address(*page) + offset, *len);
> > +               memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM +
> > +                               page_frag_offset, page_address(*page) + offset, *len);
> >         }
> >
> >         *frame_sz = PAGE_SIZE;
> > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >
> >         *page = xdp_page;
> >
> > -       return page_address(*page) + VIRTIO_XDP_HEADROOM;
> > +       return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset;
> >  }
> >
> >  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> >         void *ctx;
> >         int err;
> >         unsigned int len, hole;
> > +       unsigned int pp_frag_offset;
>
> There same here.
>

Sure, Thanks!

> >
> >         /* Extra tailroom is needed to satisfy XDP's assumption. This
> >          * means rx frags coalescing won't work, but consider we've
> > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> >          */
> >         len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
> >         if (rq->page_pool) {
> > -               struct page *page;
> > +               if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> > +                       if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> > +                                                              &pp_frag_offset, len + room)))
> > +                               return -ENOMEM;
> > +                       buf = (char *)page_address(rq->page_pool->frag_page) +
> > +                               pp_frag_offset;
> > +                       buf += headroom; /* advance address leaving hole at front of pkt */
> > +                       hole = (PAGE_SIZE << rq->page_pool->p.order)
> > +                               - rq->page_pool->frag_offset;
> > +                       if (hole < len + room) {
> > +                               if (!headroom)
> > +                                       len += hole;
> > +                               rq->page_pool->frag_offset += hole;
> > +                       }
> > +               } else {
> > +                       struct page *page;
> >
> > -               page = page_pool_dev_alloc_pages(rq->page_pool);
> > -               if (unlikely(!page))
> > -                       return -ENOMEM;
> > -               buf = (char *)page_address(page);
> > -               buf += headroom; /* advance address leaving hole at front of pkt */
> > +                       page = page_pool_dev_alloc_pages(rq->page_pool);
> > +                       if (unlikely(!page))
> > +                               return -ENOMEM;
> > +                       buf = (char *)page_address(page);
> > +                       buf += headroom; /* advance address leaving hole at front of pkt */
> > +               }
> >         } else {
> >                 if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
> >                         return -ENOMEM;
> > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
> >         struct virtio_device *vdev = rq->vq->vdev;
> >
> >         struct page_pool_params pp_params = {
> > -               .order = 0,
> > +               .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
> >                 .pool_size = rq->vq->num_max,
> >                 .nid = dev_to_node(vdev->dev.parent),
> >                 .dev = vdev->dev.parent,
> >                 .offset = 0,
> >         };
> >
> > +       if (page_pool_frag)
> > +               pp_params.flags |= PP_FLAG_PAGE_FRAG;
> > +
> >         rq->page_pool = page_pool_create(&pp_params);
> >         if (IS_ERR(rq->page_pool)) {
> >                 dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
> > --
> > 2.31.1
> >
> >
>
> --
> /Horatiu
Michael S. Tsirkin May 28, 2023, 6:25 a.m. UTC | #4
On Fri, May 26, 2023 at 01:46:19PM +0800, Liang Chen wrote:
> To further enhance performance, implement page pool fragmentation
> support and introduce a module parameter to enable or disable it.
> 
> In single-core vm testing environments, there is an additional performance
> gain observed in the normal path compared to the one packet per page
> approach.
>   Upstream codebase: 47.5 Gbits/sec
>   Upstream codebase with page pool: 50.2 Gbits/sec
>   Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec
> 
> There is also some performance gain for XDP cpumap.
>   Upstream codebase: 1.38 Gbits/sec
>   Upstream codebase with page pool: 9.74 Gbits/sec
>   Upstream codebase with page pool fragmentation: 10.3 Gbits/sec
> 
> Signed-off-by: Liang Chen <liangchen.linux@gmail.com>

I think it's called fragmenting not fragmentation.


> ---
>  drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++----------
>  1 file changed, 55 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 99c0ca0c1781..ac40b8c66c59 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
>  module_param(napi_tx, bool, 0644);
>  
>  static bool page_pool_enabled;
> +static bool page_pool_frag;
>  module_param(page_pool_enabled, bool, 0400);
> +module_param(page_pool_frag, bool, 0400);
>  
>  /* FIXME: MTU in config. */
>  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)

So here again same questions.

-when is this a net perf gain when does it have no effect?
-can be on by default
- can we get rid of the extra modes?


> @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
>  				       struct page *p,
>  				       int offset,
>  				       int page_off,
> -				       unsigned int *len)
> +				       unsigned int *len,
> +					   unsigned int *pp_frag_offset)
>  {
>  	int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>  	struct page *page;
> +	unsigned int pp_frag_offset_val;
>  
>  	if (page_off + *len + tailroom > PAGE_SIZE)
>  		return NULL;
>  
>  	if (rq->page_pool)
> -		page = page_pool_dev_alloc_pages(rq->page_pool);
> +		if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> +			page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset,
> +							PAGE_SIZE);
> +		else
> +			page = page_pool_dev_alloc_pages(rq->page_pool);
>  	else
>  		page = alloc_page(GFP_ATOMIC);
>  
>  	if (!page)
>  		return NULL;
>  
> -	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> +	pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
> +
> +	memcpy(page_address(page) + page_off + pp_frag_offset_val,
> +	       page_address(p) + offset, *len);
>  	page_off += *len;
>  
>  	while (--*num_buf) {
> @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
>  			goto err_buf;
>  		}
>  
> -		memcpy(page_address(page) + page_off,
> +		memcpy(page_address(page) + page_off + pp_frag_offset_val,
>  		       page_address(p) + off, buflen);
>  		page_off += buflen;
>  		virtnet_put_page(rq, p);
> @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
>  			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>  		xdp_page = xdp_linearize_page(rq, &num_buf, page,
>  					      offset, header_offset,
> -					      &tlen);
> +					      &tlen, NULL);
>  		if (!xdp_page)
>  			goto err_xdp;
>  
> @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>  	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>  	struct page *xdp_page;
>  	unsigned int xdp_room;
> +	unsigned int page_frag_offset = 0;
>  
>  	/* Transient failure which in theory could occur if
>  	 * in-flight packets from before XDP was enabled reach
> @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>  		xdp_page = xdp_linearize_page(rq, num_buf,
>  					      *page, offset,
>  					      VIRTIO_XDP_HEADROOM,
> -					      len);
> +					      len,
> +						  &page_frag_offset);
>  		if (!xdp_page)
>  			return NULL;
>  	} else {
> @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>  			return NULL;
>  
>  		if (rq->page_pool)
> -			xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> +			if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> +				xdp_page = page_pool_dev_alloc_frag(rq->page_pool,
> +								    &page_frag_offset, PAGE_SIZE);
> +			else
> +				xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
>  		else
>  			xdp_page = alloc_page(GFP_ATOMIC);
> +
>  		if (!xdp_page)
>  			return NULL;
>  
> -		memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> -		       page_address(*page) + offset, *len);
> +		memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM +
> +				page_frag_offset, page_address(*page) + offset, *len);
>  	}
>  
>  	*frame_sz = PAGE_SIZE;
> @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
>  
>  	*page = xdp_page;
>  
> -	return page_address(*page) + VIRTIO_XDP_HEADROOM;
> +	return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset;
>  }
>  
>  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
>  	void *ctx;
>  	int err;
>  	unsigned int len, hole;
> +	unsigned int pp_frag_offset;
>  
>  	/* Extra tailroom is needed to satisfy XDP's assumption. This
>  	 * means rx frags coalescing won't work, but consider we've
> @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
>  	 */
>  	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
>  	if (rq->page_pool) {
> -		struct page *page;
> +		if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> +			if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> +							       &pp_frag_offset, len + room)))
> +				return -ENOMEM;
> +			buf = (char *)page_address(rq->page_pool->frag_page) +
> +				pp_frag_offset;
> +			buf += headroom; /* advance address leaving hole at front of pkt */
> +			hole = (PAGE_SIZE << rq->page_pool->p.order)
> +				- rq->page_pool->frag_offset;
> +			if (hole < len + room) {
> +				if (!headroom)
> +					len += hole;
> +				rq->page_pool->frag_offset += hole;
> +			}
> +		} else {
> +			struct page *page;
>  
> -		page = page_pool_dev_alloc_pages(rq->page_pool);
> -		if (unlikely(!page))
> -			return -ENOMEM;
> -		buf = (char *)page_address(page);
> -		buf += headroom; /* advance address leaving hole at front of pkt */
> +			page = page_pool_dev_alloc_pages(rq->page_pool);
> +			if (unlikely(!page))
> +				return -ENOMEM;
> +			buf = (char *)page_address(page);
> +			buf += headroom; /* advance address leaving hole at front of pkt */
> +		}
>  	} else {
>  		if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
>  			return -ENOMEM;
> @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
>  	struct virtio_device *vdev = rq->vq->vdev;
>  
>  	struct page_pool_params pp_params = {
> -		.order = 0,
> +		.order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
>  		.pool_size = rq->vq->num_max,
>  		.nid = dev_to_node(vdev->dev.parent),
>  		.dev = vdev->dev.parent,
>  		.offset = 0,
>  	};
>  
> +	if (page_pool_frag)
> +		pp_params.flags |= PP_FLAG_PAGE_FRAG;
> +
>  	rq->page_pool = page_pool_create(&pp_params);
>  	if (IS_ERR(rq->page_pool)) {
>  		dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
> -- 
> 2.31.1
Yunsheng Lin May 29, 2023, 1:33 a.m. UTC | #5
On 2023/5/26 13:46, Liang Chen wrote:

...

> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 99c0ca0c1781..ac40b8c66c59 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
>  module_param(napi_tx, bool, 0644);
>  
>  static bool page_pool_enabled;
> +static bool page_pool_frag;
>  module_param(page_pool_enabled, bool, 0400);
> +module_param(page_pool_frag, bool, 0400);

The below patchset unifies the frag and non-frag page for
page_pool_alloc_frag() API, perhaps it would simplify the
driver's support of page pool.

https://patchwork.kernel.org/project/netdevbpf/cover/20230526092616.40355-1-linyunsheng@huawei.com/

>  

...

> @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
>  	 */
>  	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
>  	if (rq->page_pool) {
> -		struct page *page;
> +		if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> +			if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> +							       &pp_frag_offset, len + room)))
> +				return -ENOMEM;
> +			buf = (char *)page_address(rq->page_pool->frag_page) +
> +				pp_frag_offset;
> +			buf += headroom; /* advance address leaving hole at front of pkt */
> +			hole = (PAGE_SIZE << rq->page_pool->p.order)
> +				- rq->page_pool->frag_offset;
> +			if (hole < len + room) {
> +				if (!headroom)
> +					len += hole;
> +				rq->page_pool->frag_offset += hole;

Is there any reason why the driver need to be aware of page_pool->frag_offset?
Isn't the page_pool_dev_alloc_frag() will drain the last page for you when
page_pool_dev_alloc_frag() is called with size being 'len + room' later?
One case I can think of needing this is to have an accurate truesize report
for skb, but I am not sure it matters that much as 'struct page_frag_cache'
and 'page_frag' implementation both have a similar problem.

> +			}
> +		} else {
> +			struct page *page;
>  
> -		page = page_pool_dev_alloc_pages(rq->page_pool);
> -		if (unlikely(!page))
> -			return -ENOMEM;
> -		buf = (char *)page_address(page);
> -		buf += headroom; /* advance address leaving hole at front of pkt */
> +			page = page_pool_dev_alloc_pages(rq->page_pool);
> +			if (unlikely(!page))
> +				return -ENOMEM;
> +			buf = (char *)page_address(page);
> +			buf += headroom; /* advance address leaving hole at front of pkt */
> +		}
>  	} else {
>  		if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
>  			return -ENOMEM;
> @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
>  	struct virtio_device *vdev = rq->vq->vdev;
>  
>  	struct page_pool_params pp_params = {
> -		.order = 0,
> +		.order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
>  		.pool_size = rq->vq->num_max,

If it using order SKB_FRAG_PAGE_ORDER page, perhaps pool_size does
not have to be rq->vq->num_max? Even for order 0 page, perhaps the
pool_size does not need to be as big as rq->vq->num_max?

>  		.nid = dev_to_node(vdev->dev.parent),
>  		.dev = vdev->dev.parent,
>  		.offset = 0,
>  	};
>  
> +	if (page_pool_frag)
> +		pp_params.flags |= PP_FLAG_PAGE_FRAG;
> +
>  	rq->page_pool = page_pool_create(&pp_params);
>  	if (IS_ERR(rq->page_pool)) {
>  		dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
>
Liang Chen May 29, 2023, 7:29 a.m. UTC | #6
On Sun, May 28, 2023 at 2:25 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, May 26, 2023 at 01:46:19PM +0800, Liang Chen wrote:
> > To further enhance performance, implement page pool fragmentation
> > support and introduce a module parameter to enable or disable it.
> >
> > In single-core vm testing environments, there is an additional performance
> > gain observed in the normal path compared to the one packet per page
> > approach.
> >   Upstream codebase: 47.5 Gbits/sec
> >   Upstream codebase with page pool: 50.2 Gbits/sec
> >   Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec
> >
> > There is also some performance gain for XDP cpumap.
> >   Upstream codebase: 1.38 Gbits/sec
> >   Upstream codebase with page pool: 9.74 Gbits/sec
> >   Upstream codebase with page pool fragmentation: 10.3 Gbits/sec
> >
> > Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
>
> I think it's called fragmenting not fragmentation.
>
>

Sure, thanks!

> > ---
> >  drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++----------
> >  1 file changed, 55 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 99c0ca0c1781..ac40b8c66c59 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
> >  module_param(napi_tx, bool, 0644);
> >
> >  static bool page_pool_enabled;
> > +static bool page_pool_frag;
> >  module_param(page_pool_enabled, bool, 0400);
> > +module_param(page_pool_frag, bool, 0400);
> >
> >  /* FIXME: MTU in config. */
> >  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
>
> So here again same questions.
>
> -when is this a net perf gain when does it have no effect?
> -can be on by default
> - can we get rid of the extra modes?
>
>

Yeah, now I believe it makes sense to enable it by default to avoid
the extra modes. Thanks.


> > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
> >                                      struct page *p,
> >                                      int offset,
> >                                      int page_off,
> > -                                    unsigned int *len)
> > +                                    unsigned int *len,
> > +                                        unsigned int *pp_frag_offset)
> >  {
> >       int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> >       struct page *page;
> > +     unsigned int pp_frag_offset_val;
> >
> >       if (page_off + *len + tailroom > PAGE_SIZE)
> >               return NULL;
> >
> >       if (rq->page_pool)
> > -             page = page_pool_dev_alloc_pages(rq->page_pool);
> > +             if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> > +                     page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset,
> > +                                                     PAGE_SIZE);
> > +             else
> > +                     page = page_pool_dev_alloc_pages(rq->page_pool);
> >       else
> >               page = alloc_page(GFP_ATOMIC);
> >
> >       if (!page)
> >               return NULL;
> >
> > -     memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> > +     pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
> > +
> > +     memcpy(page_address(page) + page_off + pp_frag_offset_val,
> > +            page_address(p) + offset, *len);
> >       page_off += *len;
> >
> >       while (--*num_buf) {
> > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
> >                       goto err_buf;
> >               }
> >
> > -             memcpy(page_address(page) + page_off,
> > +             memcpy(page_address(page) + page_off + pp_frag_offset_val,
> >                      page_address(p) + off, buflen);
> >               page_off += buflen;
> >               virtnet_put_page(rq, p);
> > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
> >                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> >               xdp_page = xdp_linearize_page(rq, &num_buf, page,
> >                                             offset, header_offset,
> > -                                           &tlen);
> > +                                           &tlen, NULL);
> >               if (!xdp_page)
> >                       goto err_xdp;
> >
> > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >       unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> >       struct page *xdp_page;
> >       unsigned int xdp_room;
> > +     unsigned int page_frag_offset = 0;
> >
> >       /* Transient failure which in theory could occur if
> >        * in-flight packets from before XDP was enabled reach
> > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >               xdp_page = xdp_linearize_page(rq, num_buf,
> >                                             *page, offset,
> >                                             VIRTIO_XDP_HEADROOM,
> > -                                           len);
> > +                                           len,
> > +                                               &page_frag_offset);
> >               if (!xdp_page)
> >                       return NULL;
> >       } else {
> > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >                       return NULL;
> >
> >               if (rq->page_pool)
> > -                     xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> > +                     if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> > +                             xdp_page = page_pool_dev_alloc_frag(rq->page_pool,
> > +                                                                 &page_frag_offset, PAGE_SIZE);
> > +                     else
> > +                             xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> >               else
> >                       xdp_page = alloc_page(GFP_ATOMIC);
> > +
> >               if (!xdp_page)
> >                       return NULL;
> >
> > -             memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> > -                    page_address(*page) + offset, *len);
> > +             memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM +
> > +                             page_frag_offset, page_address(*page) + offset, *len);
> >       }
> >
> >       *frame_sz = PAGE_SIZE;
> > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >
> >       *page = xdp_page;
> >
> > -     return page_address(*page) + VIRTIO_XDP_HEADROOM;
> > +     return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset;
> >  }
> >
> >  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> >       void *ctx;
> >       int err;
> >       unsigned int len, hole;
> > +     unsigned int pp_frag_offset;
> >
> >       /* Extra tailroom is needed to satisfy XDP's assumption. This
> >        * means rx frags coalescing won't work, but consider we've
> > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> >        */
> >       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
> >       if (rq->page_pool) {
> > -             struct page *page;
> > +             if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> > +                     if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> > +                                                            &pp_frag_offset, len + room)))
> > +                             return -ENOMEM;
> > +                     buf = (char *)page_address(rq->page_pool->frag_page) +
> > +                             pp_frag_offset;
> > +                     buf += headroom; /* advance address leaving hole at front of pkt */
> > +                     hole = (PAGE_SIZE << rq->page_pool->p.order)
> > +                             - rq->page_pool->frag_offset;
> > +                     if (hole < len + room) {
> > +                             if (!headroom)
> > +                                     len += hole;
> > +                             rq->page_pool->frag_offset += hole;
> > +                     }
> > +             } else {
> > +                     struct page *page;
> >
> > -             page = page_pool_dev_alloc_pages(rq->page_pool);
> > -             if (unlikely(!page))
> > -                     return -ENOMEM;
> > -             buf = (char *)page_address(page);
> > -             buf += headroom; /* advance address leaving hole at front of pkt */
> > +                     page = page_pool_dev_alloc_pages(rq->page_pool);
> > +                     if (unlikely(!page))
> > +                             return -ENOMEM;
> > +                     buf = (char *)page_address(page);
> > +                     buf += headroom; /* advance address leaving hole at front of pkt */
> > +             }
> >       } else {
> >               if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
> >                       return -ENOMEM;
> > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
> >       struct virtio_device *vdev = rq->vq->vdev;
> >
> >       struct page_pool_params pp_params = {
> > -             .order = 0,
> > +             .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
> >               .pool_size = rq->vq->num_max,
> >               .nid = dev_to_node(vdev->dev.parent),
> >               .dev = vdev->dev.parent,
> >               .offset = 0,
> >       };
> >
> > +     if (page_pool_frag)
> > +             pp_params.flags |= PP_FLAG_PAGE_FRAG;
> > +
> >       rq->page_pool = page_pool_create(&pp_params);
> >       if (IS_ERR(rq->page_pool)) {
> >               dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
> > --
> > 2.31.1
>
Liang Chen May 29, 2023, 7:30 a.m. UTC | #7
On Mon, May 29, 2023 at 9:33 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2023/5/26 13:46, Liang Chen wrote:
>
> ...
>
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 99c0ca0c1781..ac40b8c66c59 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
> >  module_param(napi_tx, bool, 0644);
> >
> >  static bool page_pool_enabled;
> > +static bool page_pool_frag;
> >  module_param(page_pool_enabled, bool, 0400);
> > +module_param(page_pool_frag, bool, 0400);
>
> The below patchset unifies the frag and non-frag page for
> page_pool_alloc_frag() API, perhaps it would simplify the
> driver's support of page pool.
>
> https://patchwork.kernel.org/project/netdevbpf/cover/20230526092616.40355-1-linyunsheng@huawei.com/
>

Thanks for the information and the work to make driver support easy. I
will rebase accordingly after it lands.

> >
>
> ...
>
> > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> >        */
> >       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
> >       if (rq->page_pool) {
> > -             struct page *page;
> > +             if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> > +                     if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> > +                                                            &pp_frag_offset, len + room)))
> > +                             return -ENOMEM;
> > +                     buf = (char *)page_address(rq->page_pool->frag_page) +
> > +                             pp_frag_offset;
> > +                     buf += headroom; /* advance address leaving hole at front of pkt */
> > +                     hole = (PAGE_SIZE << rq->page_pool->p.order)
> > +                             - rq->page_pool->frag_offset;
> > +                     if (hole < len + room) {
> > +                             if (!headroom)
> > +                                     len += hole;
> > +                             rq->page_pool->frag_offset += hole;
>
> Is there any reason why the driver need to be aware of page_pool->frag_offset?
> Isn't the page_pool_dev_alloc_frag() will drain the last page for you when
> page_pool_dev_alloc_frag() is called with size being 'len + room' later?
> One case I can think of needing this is to have an accurate truesize report
> for skb, but I am not sure it matters that much as 'struct page_frag_cache'
> and 'page_frag' implementation both have a similar problem.
>

Yeah, as you pointed out page_pool_dev_alloc_frag will drain the page
itself, so does skb_page_frag_refill. This is trying to keep the logic
consistent with non page pool case where the hole was skipped and
included in buffer len.

> > +                     }
> > +             } else {
> > +                     struct page *page;
> >
> > -             page = page_pool_dev_alloc_pages(rq->page_pool);
> > -             if (unlikely(!page))
> > -                     return -ENOMEM;
> > -             buf = (char *)page_address(page);
> > -             buf += headroom; /* advance address leaving hole at front of pkt */
> > +                     page = page_pool_dev_alloc_pages(rq->page_pool);
> > +                     if (unlikely(!page))
> > +                             return -ENOMEM;
> > +                     buf = (char *)page_address(page);
> > +                     buf += headroom; /* advance address leaving hole at front of pkt */
> > +             }
> >       } else {
> >               if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
> >                       return -ENOMEM;
> > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
> >       struct virtio_device *vdev = rq->vq->vdev;
> >
> >       struct page_pool_params pp_params = {
> > -             .order = 0,
> > +             .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
> >               .pool_size = rq->vq->num_max,
>
> If it using order SKB_FRAG_PAGE_ORDER page, perhaps pool_size does
> not have to be rq->vq->num_max? Even for order 0 page, perhaps the
> pool_size does not need to be as big as rq->vq->num_max?
>

Thanks for pointing this out! pool_size will be lowered to a more
appropriate value on v2.


> >               .nid = dev_to_node(vdev->dev.parent),
> >               .dev = vdev->dev.parent,
> >               .offset = 0,
> >       };
> >
> > +     if (page_pool_frag)
> > +             pp_params.flags |= PP_FLAG_PAGE_FRAG;
> > +
> >       rq->page_pool = page_pool_create(&pp_params);
> >       if (IS_ERR(rq->page_pool)) {
> >               dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
> >
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 99c0ca0c1781..ac40b8c66c59 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -32,7 +32,9 @@  module_param(gso, bool, 0444);
 module_param(napi_tx, bool, 0644);
 
 static bool page_pool_enabled;
+static bool page_pool_frag;
 module_param(page_pool_enabled, bool, 0400);
+module_param(page_pool_frag, bool, 0400);
 
 /* FIXME: MTU in config. */
 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
@@ -909,23 +911,32 @@  static struct page *xdp_linearize_page(struct receive_queue *rq,
 				       struct page *p,
 				       int offset,
 				       int page_off,
-				       unsigned int *len)
+				       unsigned int *len,
+					   unsigned int *pp_frag_offset)
 {
 	int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	struct page *page;
+	unsigned int pp_frag_offset_val;
 
 	if (page_off + *len + tailroom > PAGE_SIZE)
 		return NULL;
 
 	if (rq->page_pool)
-		page = page_pool_dev_alloc_pages(rq->page_pool);
+		if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
+			page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset,
+							PAGE_SIZE);
+		else
+			page = page_pool_dev_alloc_pages(rq->page_pool);
 	else
 		page = alloc_page(GFP_ATOMIC);
 
 	if (!page)
 		return NULL;
 
-	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+	pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
+
+	memcpy(page_address(page) + page_off + pp_frag_offset_val,
+	       page_address(p) + offset, *len);
 	page_off += *len;
 
 	while (--*num_buf) {
@@ -948,7 +959,7 @@  static struct page *xdp_linearize_page(struct receive_queue *rq,
 			goto err_buf;
 		}
 
-		memcpy(page_address(page) + page_off,
+		memcpy(page_address(page) + page_off + pp_frag_offset_val,
 		       page_address(p) + off, buflen);
 		page_off += buflen;
 		virtnet_put_page(rq, p);
@@ -1029,7 +1040,7 @@  static struct sk_buff *receive_small_xdp(struct net_device *dev,
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		xdp_page = xdp_linearize_page(rq, &num_buf, page,
 					      offset, header_offset,
-					      &tlen);
+					      &tlen, NULL);
 		if (!xdp_page)
 			goto err_xdp;
 
@@ -1323,6 +1334,7 @@  static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 	struct page *xdp_page;
 	unsigned int xdp_room;
+	unsigned int page_frag_offset = 0;
 
 	/* Transient failure which in theory could occur if
 	 * in-flight packets from before XDP was enabled reach
@@ -1356,7 +1368,8 @@  static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
 		xdp_page = xdp_linearize_page(rq, num_buf,
 					      *page, offset,
 					      VIRTIO_XDP_HEADROOM,
-					      len);
+					      len,
+						  &page_frag_offset);
 		if (!xdp_page)
 			return NULL;
 	} else {
@@ -1366,14 +1379,19 @@  static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
 			return NULL;
 
 		if (rq->page_pool)
-			xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
+			if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
+				xdp_page = page_pool_dev_alloc_frag(rq->page_pool,
+								    &page_frag_offset, PAGE_SIZE);
+			else
+				xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
 		else
 			xdp_page = alloc_page(GFP_ATOMIC);
+
 		if (!xdp_page)
 			return NULL;
 
-		memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
-		       page_address(*page) + offset, *len);
+		memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM +
+				page_frag_offset, page_address(*page) + offset, *len);
 	}
 
 	*frame_sz = PAGE_SIZE;
@@ -1382,7 +1400,7 @@  static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
 
 	*page = xdp_page;
 
-	return page_address(*page) + VIRTIO_XDP_HEADROOM;
+	return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset;
 }
 
 static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
@@ -1762,6 +1780,7 @@  static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	void *ctx;
 	int err;
 	unsigned int len, hole;
+	unsigned int pp_frag_offset;
 
 	/* Extra tailroom is needed to satisfy XDP's assumption. This
 	 * means rx frags coalescing won't work, but consider we've
@@ -1769,13 +1788,29 @@  static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	 */
 	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
 	if (rq->page_pool) {
-		struct page *page;
+		if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
+			if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
+							       &pp_frag_offset, len + room)))
+				return -ENOMEM;
+			buf = (char *)page_address(rq->page_pool->frag_page) +
+				pp_frag_offset;
+			buf += headroom; /* advance address leaving hole at front of pkt */
+			hole = (PAGE_SIZE << rq->page_pool->p.order)
+				- rq->page_pool->frag_offset;
+			if (hole < len + room) {
+				if (!headroom)
+					len += hole;
+				rq->page_pool->frag_offset += hole;
+			}
+		} else {
+			struct page *page;
 
-		page = page_pool_dev_alloc_pages(rq->page_pool);
-		if (unlikely(!page))
-			return -ENOMEM;
-		buf = (char *)page_address(page);
-		buf += headroom; /* advance address leaving hole at front of pkt */
+			page = page_pool_dev_alloc_pages(rq->page_pool);
+			if (unlikely(!page))
+				return -ENOMEM;
+			buf = (char *)page_address(page);
+			buf += headroom; /* advance address leaving hole at front of pkt */
+		}
 	} else {
 		if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
 			return -ENOMEM;
@@ -3800,13 +3835,16 @@  static void virtnet_alloc_page_pool(struct receive_queue *rq)
 	struct virtio_device *vdev = rq->vq->vdev;
 
 	struct page_pool_params pp_params = {
-		.order = 0,
+		.order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
 		.pool_size = rq->vq->num_max,
 		.nid = dev_to_node(vdev->dev.parent),
 		.dev = vdev->dev.parent,
 		.offset = 0,
 	};
 
+	if (page_pool_frag)
+		pp_params.flags |= PP_FLAG_PAGE_FRAG;
+
 	rq->page_pool = page_pool_create(&pp_params);
 	if (IS_ERR(rq->page_pool)) {
 		dev_warn(&vdev->dev, "page pool creation failed: %ld\n",