Message ID | 20230526054621.18371-3-liangchen.linux@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,1/5] virtio_net: Fix an unsafe reference to the page chain | expand |
The 05/26/2023 13:46, Liang Chen wrote: Hi Liang, > > To further enhance performance, implement page pool fragmentation > support and introduce a module parameter to enable or disable it. > > In single-core vm testing environments, there is an additional performance > gain observed in the normal path compared to the one packet per page > approach. > Upstream codebase: 47.5 Gbits/sec > Upstream codebase with page pool: 50.2 Gbits/sec > Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec > > There is also some performance gain for XDP cpumap. > Upstream codebase: 1.38 Gbits/sec > Upstream codebase with page pool: 9.74 Gbits/sec > Upstream codebase with page pool fragmentation: 10.3 Gbits/sec > > Signed-off-by: Liang Chen <liangchen.linux@gmail.com> > --- > drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++---------- > 1 file changed, 55 insertions(+), 17 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 99c0ca0c1781..ac40b8c66c59 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); > module_param(napi_tx, bool, 0644); > > static bool page_pool_enabled; > +static bool page_pool_frag; > module_param(page_pool_enabled, bool, 0400); > +module_param(page_pool_frag, bool, 0400); > > /* FIXME: MTU in config. */ > #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > struct page *p, > int offset, > int page_off, > - unsigned int *len) > + unsigned int *len, > + unsigned int *pp_frag_offset) The 'unsigned int *pp_frag_offset' seems to be unaligned. > { > int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > struct page *page; > + unsigned int pp_frag_offset_val; Please use reverse christmas tree notation here. The pp_frag_offset_val needs to be declared before page; > > if (page_off + *len + tailroom > PAGE_SIZE) > return NULL; > > if (rq->page_pool) > - page = page_pool_dev_alloc_pages(rq->page_pool); > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > + page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset, > + PAGE_SIZE); Don't you need to check if pp_frag_offset is null? As you call once with NULL. > + else > + page = page_pool_dev_alloc_pages(rq->page_pool); > else > page = alloc_page(GFP_ATOMIC); > > if (!page) > return NULL; > > - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); > + pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0; > + > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > + page_address(p) + offset, *len); > page_off += *len; > > while (--*num_buf) { > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > goto err_buf; > } > > - memcpy(page_address(page) + page_off, > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > page_address(p) + off, buflen); > page_off += buflen; > virtnet_put_page(rq, p); > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > xdp_page = xdp_linearize_page(rq, &num_buf, page, > offset, header_offset, > - &tlen); > + &tlen, NULL); > if (!xdp_page) > goto err_xdp; > > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > struct page *xdp_page; > unsigned int xdp_room; > + unsigned int page_frag_offset = 0; Please use reverse x-mas tree notation. > > /* Transient failure which in theory could occur if > * in-flight packets from before XDP was enabled reach > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > xdp_page = xdp_linearize_page(rq, num_buf, > *page, offset, > VIRTIO_XDP_HEADROOM, > - len); > + len, > + &page_frag_offset); You have also here some misalignment with regards to page_frag_offset. > if (!xdp_page) > return NULL; > } else { > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > return NULL; > > if (rq->page_pool) > - xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > + xdp_page = page_pool_dev_alloc_frag(rq->page_pool, > + &page_frag_offset, PAGE_SIZE); > + else > + xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > else > xdp_page = alloc_page(GFP_ATOMIC); > + > if (!xdp_page) > return NULL; > > - memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, > - page_address(*page) + offset, *len); > + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM + > + page_frag_offset, page_address(*page) + offset, *len); > } > > *frame_sz = PAGE_SIZE; > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > *page = xdp_page; > > - return page_address(*page) + VIRTIO_XDP_HEADROOM; > + return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset; > } > > static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > void *ctx; > int err; > unsigned int len, hole; > + unsigned int pp_frag_offset; There same here. > > /* Extra tailroom is needed to satisfy XDP's assumption. This > * means rx frags coalescing won't work, but consider we've > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > */ > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > if (rq->page_pool) { > - struct page *page; > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, > + &pp_frag_offset, len + room))) > + return -ENOMEM; > + buf = (char *)page_address(rq->page_pool->frag_page) + > + pp_frag_offset; > + buf += headroom; /* advance address leaving hole at front of pkt */ > + hole = (PAGE_SIZE << rq->page_pool->p.order) > + - rq->page_pool->frag_offset; > + if (hole < len + room) { > + if (!headroom) > + len += hole; > + rq->page_pool->frag_offset += hole; > + } > + } else { > + struct page *page; > > - page = page_pool_dev_alloc_pages(rq->page_pool); > - if (unlikely(!page)) > - return -ENOMEM; > - buf = (char *)page_address(page); > - buf += headroom; /* advance address leaving hole at front of pkt */ > + page = page_pool_dev_alloc_pages(rq->page_pool); > + if (unlikely(!page)) > + return -ENOMEM; > + buf = (char *)page_address(page); > + buf += headroom; /* advance address leaving hole at front of pkt */ > + } > } else { > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > return -ENOMEM; > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) > struct virtio_device *vdev = rq->vq->vdev; > > struct page_pool_params pp_params = { > - .order = 0, > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, > .pool_size = rq->vq->num_max, > .nid = dev_to_node(vdev->dev.parent), > .dev = vdev->dev.parent, > .offset = 0, > }; > > + if (page_pool_frag) > + pp_params.flags |= PP_FLAG_PAGE_FRAG; > + > rq->page_pool = page_pool_create(&pp_params); > if (IS_ERR(rq->page_pool)) { > dev_warn(&vdev->dev, "page pool creation failed: %ld\n", > -- > 2.31.1 > >
Hi Liang, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/virtio_net-Add-page_pool-support-to-improve-performance/20230526-135805 base: net-next/main patch link: https://lore.kernel.org/r/20230526054621.18371-3-liangchen.linux%40gmail.com patch subject: [PATCH net-next 3/5] virtio_net: Add page pool fragmentation support config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20230527/202305270116.TJ31IjNL-lkp@intel.com/config) compiler: gcc-11 (Debian 11.3.0-12) 11.3.0 reproduce (this is a W=1 build): # https://github.com/intel-lab-lkp/linux/commit/dda0469e059354b61192e1d25b77c57351346282 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Liang-Chen/virtio_net-Add-page_pool-support-to-improve-performance/20230526-135805 git checkout dda0469e059354b61192e1d25b77c57351346282 # save the config file mkdir build_dir && cp config build_dir/.config make W=1 O=build_dir ARCH=x86_64 olddefconfig make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202305270116.TJ31IjNL-lkp@intel.com/ All errors (new ones prefixed by >>): ld: vmlinux.o: in function `virtnet_find_vqs': virtio_net.c:(.text+0x901fd2): undefined reference to `page_pool_create' ld: vmlinux.o: in function `add_recvbuf_mergeable.isra.0': virtio_net.c:(.text+0x905662): undefined reference to `page_pool_alloc_pages' >> ld: virtio_net.c:(.text+0x905715): undefined reference to `page_pool_alloc_frag' ld: vmlinux.o: in function `xdp_linearize_page': virtio_net.c:(.text+0x906c50): undefined reference to `page_pool_alloc_pages' ld: virtio_net.c:(.text+0x906e33): undefined reference to `page_pool_alloc_frag' ld: vmlinux.o: in function `mergeable_xdp_get_buf.isra.0': >> virtio_net.c:(.text+0x90740e): undefined reference to `page_pool_alloc_frag' >> ld: virtio_net.c:(.text+0x90750b): undefined reference to `page_pool_alloc_pages'
On Fri, May 26, 2023 at 4:29 PM Horatiu Vultur <horatiu.vultur@microchip.com> wrote: > > The 05/26/2023 13:46, Liang Chen wrote: > > Hi Liang, > > > > > To further enhance performance, implement page pool fragmentation > > support and introduce a module parameter to enable or disable it. > > > > In single-core vm testing environments, there is an additional performance > > gain observed in the normal path compared to the one packet per page > > approach. > > Upstream codebase: 47.5 Gbits/sec > > Upstream codebase with page pool: 50.2 Gbits/sec > > Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec > > > > There is also some performance gain for XDP cpumap. > > Upstream codebase: 1.38 Gbits/sec > > Upstream codebase with page pool: 9.74 Gbits/sec > > Upstream codebase with page pool fragmentation: 10.3 Gbits/sec > > > > Signed-off-by: Liang Chen <liangchen.linux@gmail.com> > > --- > > drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++---------- > > 1 file changed, 55 insertions(+), 17 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index 99c0ca0c1781..ac40b8c66c59 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); > > module_param(napi_tx, bool, 0644); > > > > static bool page_pool_enabled; > > +static bool page_pool_frag; > > module_param(page_pool_enabled, bool, 0400); > > +module_param(page_pool_frag, bool, 0400); > > > > /* FIXME: MTU in config. */ > > #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) > > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > > struct page *p, > > int offset, > > int page_off, > > - unsigned int *len) > > + unsigned int *len, > > + unsigned int *pp_frag_offset) > > The 'unsigned int *pp_frag_offset' seems to be unaligned. > Sure, Thanks! > > { > > int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > > struct page *page; > > + unsigned int pp_frag_offset_val; > > Please use reverse christmas tree notation here. The pp_frag_offset_val > needs to be declared before page; > Sure. Will do on v2. > > > > if (page_off + *len + tailroom > PAGE_SIZE) > > return NULL; > > > > if (rq->page_pool) > > - page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > > + page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset, > > + PAGE_SIZE); > > Don't you need to check if pp_frag_offset is null? As you call once with > NULL. > At the moment, page_pool is enabled only for mergeable mode, and the path leading to a call with NULL pp_frag_offset is from small mode. But I will evaluate again whether it is beneficial to support page_pool for small mode on v2. Thanks. > > + else > > + page = page_pool_dev_alloc_pages(rq->page_pool); > > else > > page = alloc_page(GFP_ATOMIC); > > > > if (!page) > > return NULL; > > > > - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); > > + pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0; > > + > > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > > + page_address(p) + offset, *len); > > page_off += *len; > > > > while (--*num_buf) { > > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > > goto err_buf; > > } > > > > - memcpy(page_address(page) + page_off, > > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > > page_address(p) + off, buflen); > > page_off += buflen; > > virtnet_put_page(rq, p); > > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, > > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > > xdp_page = xdp_linearize_page(rq, &num_buf, page, > > offset, header_offset, > > - &tlen); > > + &tlen, NULL); > > if (!xdp_page) > > goto err_xdp; > > > > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > > struct page *xdp_page; > > unsigned int xdp_room; > > + unsigned int page_frag_offset = 0; > > Please use reverse x-mas tree notation. > Sure. Will do on v2. > > > > /* Transient failure which in theory could occur if > > * in-flight packets from before XDP was enabled reach > > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > xdp_page = xdp_linearize_page(rq, num_buf, > > *page, offset, > > VIRTIO_XDP_HEADROOM, > > - len); > > + len, > > + &page_frag_offset); > > You have also here some misalignment with regards to page_frag_offset. > Sure, Thanks! > > if (!xdp_page) > > return NULL; > > } else { > > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > return NULL; > > > > if (rq->page_pool) > > - xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > > + xdp_page = page_pool_dev_alloc_frag(rq->page_pool, > > + &page_frag_offset, PAGE_SIZE); > > + else > > + xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > > else > > xdp_page = alloc_page(GFP_ATOMIC); > > + > > if (!xdp_page) > > return NULL; > > > > - memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, > > - page_address(*page) + offset, *len); > > + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM + > > + page_frag_offset, page_address(*page) + offset, *len); > > } > > > > *frame_sz = PAGE_SIZE; > > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > > > *page = xdp_page; > > > > - return page_address(*page) + VIRTIO_XDP_HEADROOM; > > + return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset; > > } > > > > static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, > > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > > void *ctx; > > int err; > > unsigned int len, hole; > > + unsigned int pp_frag_offset; > > There same here. > Sure, Thanks! > > > > /* Extra tailroom is needed to satisfy XDP's assumption. This > > * means rx frags coalescing won't work, but consider we've > > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > > */ > > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > > if (rq->page_pool) { > > - struct page *page; > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { > > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, > > + &pp_frag_offset, len + room))) > > + return -ENOMEM; > > + buf = (char *)page_address(rq->page_pool->frag_page) + > > + pp_frag_offset; > > + buf += headroom; /* advance address leaving hole at front of pkt */ > > + hole = (PAGE_SIZE << rq->page_pool->p.order) > > + - rq->page_pool->frag_offset; > > + if (hole < len + room) { > > + if (!headroom) > > + len += hole; > > + rq->page_pool->frag_offset += hole; > > + } > > + } else { > > + struct page *page; > > > > - page = page_pool_dev_alloc_pages(rq->page_pool); > > - if (unlikely(!page)) > > - return -ENOMEM; > > - buf = (char *)page_address(page); > > - buf += headroom; /* advance address leaving hole at front of pkt */ > > + page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (unlikely(!page)) > > + return -ENOMEM; > > + buf = (char *)page_address(page); > > + buf += headroom; /* advance address leaving hole at front of pkt */ > > + } > > } else { > > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > > return -ENOMEM; > > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) > > struct virtio_device *vdev = rq->vq->vdev; > > > > struct page_pool_params pp_params = { > > - .order = 0, > > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, > > .pool_size = rq->vq->num_max, > > .nid = dev_to_node(vdev->dev.parent), > > .dev = vdev->dev.parent, > > .offset = 0, > > }; > > > > + if (page_pool_frag) > > + pp_params.flags |= PP_FLAG_PAGE_FRAG; > > + > > rq->page_pool = page_pool_create(&pp_params); > > if (IS_ERR(rq->page_pool)) { > > dev_warn(&vdev->dev, "page pool creation failed: %ld\n", > > -- > > 2.31.1 > > > > > > -- > /Horatiu
On Fri, May 26, 2023 at 01:46:19PM +0800, Liang Chen wrote: > To further enhance performance, implement page pool fragmentation > support and introduce a module parameter to enable or disable it. > > In single-core vm testing environments, there is an additional performance > gain observed in the normal path compared to the one packet per page > approach. > Upstream codebase: 47.5 Gbits/sec > Upstream codebase with page pool: 50.2 Gbits/sec > Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec > > There is also some performance gain for XDP cpumap. > Upstream codebase: 1.38 Gbits/sec > Upstream codebase with page pool: 9.74 Gbits/sec > Upstream codebase with page pool fragmentation: 10.3 Gbits/sec > > Signed-off-by: Liang Chen <liangchen.linux@gmail.com> I think it's called fragmenting not fragmentation. > --- > drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++---------- > 1 file changed, 55 insertions(+), 17 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 99c0ca0c1781..ac40b8c66c59 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); > module_param(napi_tx, bool, 0644); > > static bool page_pool_enabled; > +static bool page_pool_frag; > module_param(page_pool_enabled, bool, 0400); > +module_param(page_pool_frag, bool, 0400); > > /* FIXME: MTU in config. */ > #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) So here again same questions. -when is this a net perf gain when does it have no effect? -can be on by default - can we get rid of the extra modes? > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > struct page *p, > int offset, > int page_off, > - unsigned int *len) > + unsigned int *len, > + unsigned int *pp_frag_offset) > { > int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > struct page *page; > + unsigned int pp_frag_offset_val; > > if (page_off + *len + tailroom > PAGE_SIZE) > return NULL; > > if (rq->page_pool) > - page = page_pool_dev_alloc_pages(rq->page_pool); > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > + page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset, > + PAGE_SIZE); > + else > + page = page_pool_dev_alloc_pages(rq->page_pool); > else > page = alloc_page(GFP_ATOMIC); > > if (!page) > return NULL; > > - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); > + pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0; > + > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > + page_address(p) + offset, *len); > page_off += *len; > > while (--*num_buf) { > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > goto err_buf; > } > > - memcpy(page_address(page) + page_off, > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > page_address(p) + off, buflen); > page_off += buflen; > virtnet_put_page(rq, p); > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > xdp_page = xdp_linearize_page(rq, &num_buf, page, > offset, header_offset, > - &tlen); > + &tlen, NULL); > if (!xdp_page) > goto err_xdp; > > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > struct page *xdp_page; > unsigned int xdp_room; > + unsigned int page_frag_offset = 0; > > /* Transient failure which in theory could occur if > * in-flight packets from before XDP was enabled reach > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > xdp_page = xdp_linearize_page(rq, num_buf, > *page, offset, > VIRTIO_XDP_HEADROOM, > - len); > + len, > + &page_frag_offset); > if (!xdp_page) > return NULL; > } else { > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > return NULL; > > if (rq->page_pool) > - xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > + xdp_page = page_pool_dev_alloc_frag(rq->page_pool, > + &page_frag_offset, PAGE_SIZE); > + else > + xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > else > xdp_page = alloc_page(GFP_ATOMIC); > + > if (!xdp_page) > return NULL; > > - memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, > - page_address(*page) + offset, *len); > + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM + > + page_frag_offset, page_address(*page) + offset, *len); > } > > *frame_sz = PAGE_SIZE; > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > *page = xdp_page; > > - return page_address(*page) + VIRTIO_XDP_HEADROOM; > + return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset; > } > > static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > void *ctx; > int err; > unsigned int len, hole; > + unsigned int pp_frag_offset; > > /* Extra tailroom is needed to satisfy XDP's assumption. This > * means rx frags coalescing won't work, but consider we've > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > */ > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > if (rq->page_pool) { > - struct page *page; > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, > + &pp_frag_offset, len + room))) > + return -ENOMEM; > + buf = (char *)page_address(rq->page_pool->frag_page) + > + pp_frag_offset; > + buf += headroom; /* advance address leaving hole at front of pkt */ > + hole = (PAGE_SIZE << rq->page_pool->p.order) > + - rq->page_pool->frag_offset; > + if (hole < len + room) { > + if (!headroom) > + len += hole; > + rq->page_pool->frag_offset += hole; > + } > + } else { > + struct page *page; > > - page = page_pool_dev_alloc_pages(rq->page_pool); > - if (unlikely(!page)) > - return -ENOMEM; > - buf = (char *)page_address(page); > - buf += headroom; /* advance address leaving hole at front of pkt */ > + page = page_pool_dev_alloc_pages(rq->page_pool); > + if (unlikely(!page)) > + return -ENOMEM; > + buf = (char *)page_address(page); > + buf += headroom; /* advance address leaving hole at front of pkt */ > + } > } else { > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > return -ENOMEM; > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) > struct virtio_device *vdev = rq->vq->vdev; > > struct page_pool_params pp_params = { > - .order = 0, > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, > .pool_size = rq->vq->num_max, > .nid = dev_to_node(vdev->dev.parent), > .dev = vdev->dev.parent, > .offset = 0, > }; > > + if (page_pool_frag) > + pp_params.flags |= PP_FLAG_PAGE_FRAG; > + > rq->page_pool = page_pool_create(&pp_params); > if (IS_ERR(rq->page_pool)) { > dev_warn(&vdev->dev, "page pool creation failed: %ld\n", > -- > 2.31.1
On 2023/5/26 13:46, Liang Chen wrote: ... > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 99c0ca0c1781..ac40b8c66c59 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); > module_param(napi_tx, bool, 0644); > > static bool page_pool_enabled; > +static bool page_pool_frag; > module_param(page_pool_enabled, bool, 0400); > +module_param(page_pool_frag, bool, 0400); The below patchset unifies the frag and non-frag page for page_pool_alloc_frag() API, perhaps it would simplify the driver's support of page pool. https://patchwork.kernel.org/project/netdevbpf/cover/20230526092616.40355-1-linyunsheng@huawei.com/ > ... > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > */ > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > if (rq->page_pool) { > - struct page *page; > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, > + &pp_frag_offset, len + room))) > + return -ENOMEM; > + buf = (char *)page_address(rq->page_pool->frag_page) + > + pp_frag_offset; > + buf += headroom; /* advance address leaving hole at front of pkt */ > + hole = (PAGE_SIZE << rq->page_pool->p.order) > + - rq->page_pool->frag_offset; > + if (hole < len + room) { > + if (!headroom) > + len += hole; > + rq->page_pool->frag_offset += hole; Is there any reason why the driver need to be aware of page_pool->frag_offset? Isn't the page_pool_dev_alloc_frag() will drain the last page for you when page_pool_dev_alloc_frag() is called with size being 'len + room' later? One case I can think of needing this is to have an accurate truesize report for skb, but I am not sure it matters that much as 'struct page_frag_cache' and 'page_frag' implementation both have a similar problem. > + } > + } else { > + struct page *page; > > - page = page_pool_dev_alloc_pages(rq->page_pool); > - if (unlikely(!page)) > - return -ENOMEM; > - buf = (char *)page_address(page); > - buf += headroom; /* advance address leaving hole at front of pkt */ > + page = page_pool_dev_alloc_pages(rq->page_pool); > + if (unlikely(!page)) > + return -ENOMEM; > + buf = (char *)page_address(page); > + buf += headroom; /* advance address leaving hole at front of pkt */ > + } > } else { > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > return -ENOMEM; > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) > struct virtio_device *vdev = rq->vq->vdev; > > struct page_pool_params pp_params = { > - .order = 0, > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, > .pool_size = rq->vq->num_max, If it using order SKB_FRAG_PAGE_ORDER page, perhaps pool_size does not have to be rq->vq->num_max? Even for order 0 page, perhaps the pool_size does not need to be as big as rq->vq->num_max? > .nid = dev_to_node(vdev->dev.parent), > .dev = vdev->dev.parent, > .offset = 0, > }; > > + if (page_pool_frag) > + pp_params.flags |= PP_FLAG_PAGE_FRAG; > + > rq->page_pool = page_pool_create(&pp_params); > if (IS_ERR(rq->page_pool)) { > dev_warn(&vdev->dev, "page pool creation failed: %ld\n", >
On Sun, May 28, 2023 at 2:25 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Fri, May 26, 2023 at 01:46:19PM +0800, Liang Chen wrote: > > To further enhance performance, implement page pool fragmentation > > support and introduce a module parameter to enable or disable it. > > > > In single-core vm testing environments, there is an additional performance > > gain observed in the normal path compared to the one packet per page > > approach. > > Upstream codebase: 47.5 Gbits/sec > > Upstream codebase with page pool: 50.2 Gbits/sec > > Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec > > > > There is also some performance gain for XDP cpumap. > > Upstream codebase: 1.38 Gbits/sec > > Upstream codebase with page pool: 9.74 Gbits/sec > > Upstream codebase with page pool fragmentation: 10.3 Gbits/sec > > > > Signed-off-by: Liang Chen <liangchen.linux@gmail.com> > > I think it's called fragmenting not fragmentation. > > Sure, thanks! > > --- > > drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++---------- > > 1 file changed, 55 insertions(+), 17 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index 99c0ca0c1781..ac40b8c66c59 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); > > module_param(napi_tx, bool, 0644); > > > > static bool page_pool_enabled; > > +static bool page_pool_frag; > > module_param(page_pool_enabled, bool, 0400); > > +module_param(page_pool_frag, bool, 0400); > > > > /* FIXME: MTU in config. */ > > #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) > > So here again same questions. > > -when is this a net perf gain when does it have no effect? > -can be on by default > - can we get rid of the extra modes? > > Yeah, now I believe it makes sense to enable it by default to avoid the extra modes. Thanks. > > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > > struct page *p, > > int offset, > > int page_off, > > - unsigned int *len) > > + unsigned int *len, > > + unsigned int *pp_frag_offset) > > { > > int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > > struct page *page; > > + unsigned int pp_frag_offset_val; > > > > if (page_off + *len + tailroom > PAGE_SIZE) > > return NULL; > > > > if (rq->page_pool) > > - page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > > + page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset, > > + PAGE_SIZE); > > + else > > + page = page_pool_dev_alloc_pages(rq->page_pool); > > else > > page = alloc_page(GFP_ATOMIC); > > > > if (!page) > > return NULL; > > > > - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); > > + pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0; > > + > > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > > + page_address(p) + offset, *len); > > page_off += *len; > > > > while (--*num_buf) { > > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > > goto err_buf; > > } > > > > - memcpy(page_address(page) + page_off, > > + memcpy(page_address(page) + page_off + pp_frag_offset_val, > > page_address(p) + off, buflen); > > page_off += buflen; > > virtnet_put_page(rq, p); > > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, > > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > > xdp_page = xdp_linearize_page(rq, &num_buf, page, > > offset, header_offset, > > - &tlen); > > + &tlen, NULL); > > if (!xdp_page) > > goto err_xdp; > > > > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > > struct page *xdp_page; > > unsigned int xdp_room; > > + unsigned int page_frag_offset = 0; > > > > /* Transient failure which in theory could occur if > > * in-flight packets from before XDP was enabled reach > > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > xdp_page = xdp_linearize_page(rq, num_buf, > > *page, offset, > > VIRTIO_XDP_HEADROOM, > > - len); > > + len, > > + &page_frag_offset); > > if (!xdp_page) > > return NULL; > > } else { > > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > return NULL; > > > > if (rq->page_pool) > > - xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) > > + xdp_page = page_pool_dev_alloc_frag(rq->page_pool, > > + &page_frag_offset, PAGE_SIZE); > > + else > > + xdp_page = page_pool_dev_alloc_pages(rq->page_pool); > > else > > xdp_page = alloc_page(GFP_ATOMIC); > > + > > if (!xdp_page) > > return NULL; > > > > - memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, > > - page_address(*page) + offset, *len); > > + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM + > > + page_frag_offset, page_address(*page) + offset, *len); > > } > > > > *frame_sz = PAGE_SIZE; > > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, > > > > *page = xdp_page; > > > > - return page_address(*page) + VIRTIO_XDP_HEADROOM; > > + return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset; > > } > > > > static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, > > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > > void *ctx; > > int err; > > unsigned int len, hole; > > + unsigned int pp_frag_offset; > > > > /* Extra tailroom is needed to satisfy XDP's assumption. This > > * means rx frags coalescing won't work, but consider we've > > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > > */ > > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > > if (rq->page_pool) { > > - struct page *page; > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { > > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, > > + &pp_frag_offset, len + room))) > > + return -ENOMEM; > > + buf = (char *)page_address(rq->page_pool->frag_page) + > > + pp_frag_offset; > > + buf += headroom; /* advance address leaving hole at front of pkt */ > > + hole = (PAGE_SIZE << rq->page_pool->p.order) > > + - rq->page_pool->frag_offset; > > + if (hole < len + room) { > > + if (!headroom) > > + len += hole; > > + rq->page_pool->frag_offset += hole; > > + } > > + } else { > > + struct page *page; > > > > - page = page_pool_dev_alloc_pages(rq->page_pool); > > - if (unlikely(!page)) > > - return -ENOMEM; > > - buf = (char *)page_address(page); > > - buf += headroom; /* advance address leaving hole at front of pkt */ > > + page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (unlikely(!page)) > > + return -ENOMEM; > > + buf = (char *)page_address(page); > > + buf += headroom; /* advance address leaving hole at front of pkt */ > > + } > > } else { > > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > > return -ENOMEM; > > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) > > struct virtio_device *vdev = rq->vq->vdev; > > > > struct page_pool_params pp_params = { > > - .order = 0, > > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, > > .pool_size = rq->vq->num_max, > > .nid = dev_to_node(vdev->dev.parent), > > .dev = vdev->dev.parent, > > .offset = 0, > > }; > > > > + if (page_pool_frag) > > + pp_params.flags |= PP_FLAG_PAGE_FRAG; > > + > > rq->page_pool = page_pool_create(&pp_params); > > if (IS_ERR(rq->page_pool)) { > > dev_warn(&vdev->dev, "page pool creation failed: %ld\n", > > -- > > 2.31.1 >
On Mon, May 29, 2023 at 9:33 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2023/5/26 13:46, Liang Chen wrote: > > ... > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index 99c0ca0c1781..ac40b8c66c59 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); > > module_param(napi_tx, bool, 0644); > > > > static bool page_pool_enabled; > > +static bool page_pool_frag; > > module_param(page_pool_enabled, bool, 0400); > > +module_param(page_pool_frag, bool, 0400); > > The below patchset unifies the frag and non-frag page for > page_pool_alloc_frag() API, perhaps it would simplify the > driver's support of page pool. > > https://patchwork.kernel.org/project/netdevbpf/cover/20230526092616.40355-1-linyunsheng@huawei.com/ > Thanks for the information and the work to make driver support easy. I will rebase accordingly after it lands. > > > > ... > > > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > > */ > > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > > if (rq->page_pool) { > > - struct page *page; > > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { > > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, > > + &pp_frag_offset, len + room))) > > + return -ENOMEM; > > + buf = (char *)page_address(rq->page_pool->frag_page) + > > + pp_frag_offset; > > + buf += headroom; /* advance address leaving hole at front of pkt */ > > + hole = (PAGE_SIZE << rq->page_pool->p.order) > > + - rq->page_pool->frag_offset; > > + if (hole < len + room) { > > + if (!headroom) > > + len += hole; > > + rq->page_pool->frag_offset += hole; > > Is there any reason why the driver need to be aware of page_pool->frag_offset? > Isn't the page_pool_dev_alloc_frag() will drain the last page for you when > page_pool_dev_alloc_frag() is called with size being 'len + room' later? > One case I can think of needing this is to have an accurate truesize report > for skb, but I am not sure it matters that much as 'struct page_frag_cache' > and 'page_frag' implementation both have a similar problem. > Yeah, as you pointed out page_pool_dev_alloc_frag will drain the page itself, so does skb_page_frag_refill. This is trying to keep the logic consistent with non page pool case where the hole was skipped and included in buffer len. > > + } > > + } else { > > + struct page *page; > > > > - page = page_pool_dev_alloc_pages(rq->page_pool); > > - if (unlikely(!page)) > > - return -ENOMEM; > > - buf = (char *)page_address(page); > > - buf += headroom; /* advance address leaving hole at front of pkt */ > > + page = page_pool_dev_alloc_pages(rq->page_pool); > > + if (unlikely(!page)) > > + return -ENOMEM; > > + buf = (char *)page_address(page); > > + buf += headroom; /* advance address leaving hole at front of pkt */ > > + } > > } else { > > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > > return -ENOMEM; > > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) > > struct virtio_device *vdev = rq->vq->vdev; > > > > struct page_pool_params pp_params = { > > - .order = 0, > > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, > > .pool_size = rq->vq->num_max, > > If it using order SKB_FRAG_PAGE_ORDER page, perhaps pool_size does > not have to be rq->vq->num_max? Even for order 0 page, perhaps the > pool_size does not need to be as big as rq->vq->num_max? > Thanks for pointing this out! pool_size will be lowered to a more appropriate value on v2. > > .nid = dev_to_node(vdev->dev.parent), > > .dev = vdev->dev.parent, > > .offset = 0, > > }; > > > > + if (page_pool_frag) > > + pp_params.flags |= PP_FLAG_PAGE_FRAG; > > + > > rq->page_pool = page_pool_create(&pp_params); > > if (IS_ERR(rq->page_pool)) { > > dev_warn(&vdev->dev, "page pool creation failed: %ld\n", > >
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 99c0ca0c1781..ac40b8c66c59 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -32,7 +32,9 @@ module_param(gso, bool, 0444); module_param(napi_tx, bool, 0644); static bool page_pool_enabled; +static bool page_pool_frag; module_param(page_pool_enabled, bool, 0400); +module_param(page_pool_frag, bool, 0400); /* FIXME: MTU in config. */ #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, struct page *p, int offset, int page_off, - unsigned int *len) + unsigned int *len, + unsigned int *pp_frag_offset) { int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page; + unsigned int pp_frag_offset_val; if (page_off + *len + tailroom > PAGE_SIZE) return NULL; if (rq->page_pool) - page = page_pool_dev_alloc_pages(rq->page_pool); + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) + page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset, + PAGE_SIZE); + else + page = page_pool_dev_alloc_pages(rq->page_pool); else page = alloc_page(GFP_ATOMIC); if (!page) return NULL; - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); + pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0; + + memcpy(page_address(page) + page_off + pp_frag_offset_val, + page_address(p) + offset, *len); page_off += *len; while (--*num_buf) { @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, goto err_buf; } - memcpy(page_address(page) + page_off, + memcpy(page_address(page) + page_off + pp_frag_offset_val, page_address(p) + off, buflen); page_off += buflen; virtnet_put_page(rq, p); @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); xdp_page = xdp_linearize_page(rq, &num_buf, page, offset, header_offset, - &tlen); + &tlen, NULL); if (!xdp_page) goto err_xdp; @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, unsigned int headroom = mergeable_ctx_to_headroom(ctx); struct page *xdp_page; unsigned int xdp_room; + unsigned int page_frag_offset = 0; /* Transient failure which in theory could occur if * in-flight packets from before XDP was enabled reach @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, xdp_page = xdp_linearize_page(rq, num_buf, *page, offset, VIRTIO_XDP_HEADROOM, - len); + len, + &page_frag_offset); if (!xdp_page) return NULL; } else { @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, return NULL; if (rq->page_pool) - xdp_page = page_pool_dev_alloc_pages(rq->page_pool); + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) + xdp_page = page_pool_dev_alloc_frag(rq->page_pool, + &page_frag_offset, PAGE_SIZE); + else + xdp_page = page_pool_dev_alloc_pages(rq->page_pool); else xdp_page = alloc_page(GFP_ATOMIC); + if (!xdp_page) return NULL; - memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, - page_address(*page) + offset, *len); + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM + + page_frag_offset, page_address(*page) + offset, *len); } *frame_sz = PAGE_SIZE; @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, *page = xdp_page; - return page_address(*page) + VIRTIO_XDP_HEADROOM; + return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset; } static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, void *ctx; int err; unsigned int len, hole; + unsigned int pp_frag_offset; /* Extra tailroom is needed to satisfy XDP's assumption. This * means rx frags coalescing won't work, but consider we've @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); if (rq->page_pool) { - struct page *page; + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) { + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool, + &pp_frag_offset, len + room))) + return -ENOMEM; + buf = (char *)page_address(rq->page_pool->frag_page) + + pp_frag_offset; + buf += headroom; /* advance address leaving hole at front of pkt */ + hole = (PAGE_SIZE << rq->page_pool->p.order) + - rq->page_pool->frag_offset; + if (hole < len + room) { + if (!headroom) + len += hole; + rq->page_pool->frag_offset += hole; + } + } else { + struct page *page; - page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!page)) - return -ENOMEM; - buf = (char *)page_address(page); - buf += headroom; /* advance address leaving hole at front of pkt */ + page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!page)) + return -ENOMEM; + buf = (char *)page_address(page); + buf += headroom; /* advance address leaving hole at front of pkt */ + } } else { if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) return -ENOMEM; @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq) struct virtio_device *vdev = rq->vq->vdev; struct page_pool_params pp_params = { - .order = 0, + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0, .pool_size = rq->vq->num_max, .nid = dev_to_node(vdev->dev.parent), .dev = vdev->dev.parent, .offset = 0, }; + if (page_pool_frag) + pp_params.flags |= PP_FLAG_PAGE_FRAG; + rq->page_pool = page_pool_create(&pp_params); if (IS_ERR(rq->page_pool)) { dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
To further enhance performance, implement page pool fragmentation support and introduce a module parameter to enable or disable it. In single-core vm testing environments, there is an additional performance gain observed in the normal path compared to the one packet per page approach. Upstream codebase: 47.5 Gbits/sec Upstream codebase with page pool: 50.2 Gbits/sec Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec There is also some performance gain for XDP cpumap. Upstream codebase: 1.38 Gbits/sec Upstream codebase with page pool: 9.74 Gbits/sec Upstream codebase with page pool fragmentation: 10.3 Gbits/sec Signed-off-by: Liang Chen <liangchen.linux@gmail.com> --- drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 17 deletions(-)