diff mbox series

[vhost,4/6] virtio_net: big mode support premapped

Message ID 20240411025127.51945-5-xuanzhuo@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series virtio_net: rx enable premapped mode by default | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 942 this patch: 948
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 953 this patch: 953
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 953 this patch: 955
netdev/checkpatch warning WARNING: line length of 89 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xuan Zhuo April 11, 2024, 2:51 a.m. UTC
In big mode, pre-mapping DMA is beneficial because if the pages are not
used, we can reuse them without needing to unmap and remap.

We require space to store the DMA address. I use the page.dma_addr to
store the DMA address from the pp structure inside the page.

Every page retrieved from get_a_page() is mapped, and its DMA address is
stored in page.dma_addr. When a page is returned to the chain, we check
the DMA status; if it is not mapped (potentially having been unmapped),
we remap it before returning it to the chain.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
 1 file changed, 81 insertions(+), 17 deletions(-)

Comments

kernel test robot April 11, 2024, 4:34 p.m. UTC | #1
Hi Xuan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on mst-vhost/linux-next]
[also build test WARNING on linus/master v6.9-rc3 next-20240411]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_ring-introduce-dma-map-api-for-page/20240411-105318
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
patch link:    https://lore.kernel.org/r/20240411025127.51945-5-xuanzhuo%40linux.alibaba.com
patch subject: [PATCH vhost 4/6] virtio_net: big mode support premapped
config: i386-randconfig-016-20240411 (https://download.01.org/0day-ci/archive/20240412/202404120044.VKtjHMzy-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240412/202404120044.VKtjHMzy-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202404120044.VKtjHMzy-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/net/virtio_net.c:449:21: warning: implicit conversion from 'dma_addr_t' (aka 'unsigned long long') to 'unsigned long' changes value from 18446744073709551615 to 4294967295 [-Wconstant-conversion]
     449 |         page_dma_addr(p) = DMA_MAPPING_ERROR;
         |                          ~ ^~~~~~~~~~~~~~~~~
   include/linux/dma-mapping.h:75:29: note: expanded from macro 'DMA_MAPPING_ERROR'
      75 | #define DMA_MAPPING_ERROR               (~(dma_addr_t)0)
         |                                          ^~~~~~~~~~~~~~
>> drivers/net/virtio_net.c:485:26: warning: result of comparison of constant 18446744073709551615 with expression of type 'unsigned long' is always false [-Wtautological-constant-out-of-range-compare]
     485 |         if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
         |             ~~~~~~~~~~~~~~~~~~~ ^  ~~~~~~~~~~~~~~~~~
   2 warnings generated.


vim +449 drivers/net/virtio_net.c

   443	
   444	static void page_chain_unmap(struct receive_queue *rq, struct page *p)
   445	{
   446		virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
   447					       DMA_FROM_DEVICE, 0);
   448	
 > 449		page_dma_addr(p) = DMA_MAPPING_ERROR;
   450	}
   451	
   452	static int page_chain_map(struct receive_queue *rq, struct page *p)
   453	{
   454		dma_addr_t addr;
   455	
   456		addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
   457		if (virtqueue_dma_mapping_error(rq->vq, addr))
   458			return -ENOMEM;
   459	
   460		page_dma_addr(p) = addr;
   461		return 0;
   462	}
   463	
   464	static void page_chain_release(struct receive_queue *rq)
   465	{
   466		struct page *p, *n;
   467	
   468		for (p = rq->pages; p; p = n) {
   469			n = page_chain_next(p);
   470	
   471			page_chain_unmap(rq, p);
   472			__free_pages(p, 0);
   473		}
   474	
   475		rq->pages = NULL;
   476	}
   477	
   478	/*
   479	 * put the whole most recent used list in the beginning for reuse
   480	 */
   481	static void give_pages(struct receive_queue *rq, struct page *page)
   482	{
   483		struct page *end;
   484	
 > 485		if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
   486			if (page_chain_map(rq, page)) {
   487				__free_pages(page, 0);
   488				return;
   489			}
   490		}
   491	
   492		/* Find end of list, sew whole thing into vi->rq.pages. */
   493		for (end = page; page_chain_next(end); end = page_chain_next(end));
   494	
   495		page_chain_add(end, rq->pages);
   496		rq->pages = page;
   497	}
   498
kernel test robot April 11, 2024, 8:11 p.m. UTC | #2
Hi Xuan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on mst-vhost/linux-next]
[also build test WARNING on linus/master v6.9-rc3 next-20240411]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_ring-introduce-dma-map-api-for-page/20240411-105318
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
patch link:    https://lore.kernel.org/r/20240411025127.51945-5-xuanzhuo%40linux.alibaba.com
patch subject: [PATCH vhost 4/6] virtio_net: big mode support premapped
config: i386-randconfig-062-20240411 (https://download.01.org/0day-ci/archive/20240412/202404120417.VUAT9H5b-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240412/202404120417.VUAT9H5b-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202404120417.VUAT9H5b-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from include/linux/skbuff.h:28,
                    from include/net/net_namespace.h:43,
                    from include/linux/netdevice.h:38,
                    from drivers/net/virtio_net.c:7:
   drivers/net/virtio_net.c: In function 'page_chain_unmap':
>> include/linux/dma-mapping.h:75:41: warning: conversion from 'long long unsigned int' to 'long unsigned int' changes value from '18446744073709551615' to '4294967295' [-Woverflow]
      75 | #define DMA_MAPPING_ERROR               (~(dma_addr_t)0)
         |                                         ^
   drivers/net/virtio_net.c:449:28: note: in expansion of macro 'DMA_MAPPING_ERROR'
     449 |         page_dma_addr(p) = DMA_MAPPING_ERROR;
         |                            ^~~~~~~~~~~~~~~~~


vim +75 include/linux/dma-mapping.h

b2fb366425ceb8 Mitchel Humpherys 2017-01-06  64  
eba304c6861613 Christoph Hellwig 2020-09-11  65  /*
eba304c6861613 Christoph Hellwig 2020-09-11  66   * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
eba304c6861613 Christoph Hellwig 2020-09-11  67   * be given to a device to use as a DMA source or target.  It is specific to a
eba304c6861613 Christoph Hellwig 2020-09-11  68   * given device and there may be a translation between the CPU physical address
eba304c6861613 Christoph Hellwig 2020-09-11  69   * space and the bus address space.
eba304c6861613 Christoph Hellwig 2020-09-11  70   *
eba304c6861613 Christoph Hellwig 2020-09-11  71   * DMA_MAPPING_ERROR is the magic error code if a mapping failed.  It should not
eba304c6861613 Christoph Hellwig 2020-09-11  72   * be used directly in drivers, but checked for using dma_mapping_error()
eba304c6861613 Christoph Hellwig 2020-09-11  73   * instead.
eba304c6861613 Christoph Hellwig 2020-09-11  74   */
42ee3cae0ed38b Christoph Hellwig 2018-11-21 @75  #define DMA_MAPPING_ERROR		(~(dma_addr_t)0)
42ee3cae0ed38b Christoph Hellwig 2018-11-21  76
Dan Carpenter April 14, 2024, 9:48 a.m. UTC | #3
Hi Xuan,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/virtio_ring-introduce-dma-map-api-for-page/20240411-105318
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
patch link:    https://lore.kernel.org/r/20240411025127.51945-5-xuanzhuo%40linux.alibaba.com
patch subject: [PATCH vhost 4/6] virtio_net: big mode support premapped
config: i386-randconfig-141-20240414 (https://download.01.org/0day-ci/archive/20240414/202404141343.iPhKo7zd-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202404141343.iPhKo7zd-lkp@intel.com/

New smatch warnings:
drivers/net/virtio_net.c:485 give_pages() warn: impossible condition '((page->dma_addr) == (~0)) => (0-u32max == u64max)'

vim +485 drivers/net/virtio_net.c

e9d7417b97f420 Jason Wang      2012-12-07  481  static void give_pages(struct receive_queue *rq, struct page *page)
0a888fd1f6320d Mark McLoughlin 2008-11-16  482  {
9ab86bbcf8be75 Shirley Ma      2010-01-29  483  	struct page *end;
0a888fd1f6320d Mark McLoughlin 2008-11-16  484  
59e4bcf761eeba Xuan Zhuo       2024-04-11 @485  	if (page_dma_addr(page) == DMA_MAPPING_ERROR) {

(struct page)->dma_addr is unsigned long but DMA_MAPPING_ERROR is
dma_addr_t.

59e4bcf761eeba Xuan Zhuo       2024-04-11  486  		if (page_chain_map(rq, page)) {
59e4bcf761eeba Xuan Zhuo       2024-04-11  487  			__free_pages(page, 0);
59e4bcf761eeba Xuan Zhuo       2024-04-11  488  			return;
59e4bcf761eeba Xuan Zhuo       2024-04-11  489  		}
59e4bcf761eeba Xuan Zhuo       2024-04-11  490  	}
59e4bcf761eeba Xuan Zhuo       2024-04-11  491  
e9d7417b97f420 Jason Wang      2012-12-07  492  	/* Find end of list, sew whole thing into vi->rq.pages. */
590f79cf558cb4 Xuan Zhuo       2024-04-11  493  	for (end = page; page_chain_next(end); end = page_chain_next(end));
590f79cf558cb4 Xuan Zhuo       2024-04-11  494  
590f79cf558cb4 Xuan Zhuo       2024-04-11  495  	page_chain_add(end, rq->pages);
e9d7417b97f420 Jason Wang      2012-12-07  496  	rq->pages = page;
0a888fd1f6320d Mark McLoughlin 2008-11-16  497  }
Jason Wang April 18, 2024, 6:25 a.m. UTC | #4
On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> In big mode, pre-mapping DMA is beneficial because if the pages are not
> used, we can reuse them without needing to unmap and remap.
>
> We require space to store the DMA address. I use the page.dma_addr to
> store the DMA address from the pp structure inside the page.
>
> Every page retrieved from get_a_page() is mapped, and its DMA address is
> stored in page.dma_addr. When a page is returned to the chain, we check
> the DMA status; if it is not mapped (potentially having been unmapped),
> we remap it before returning it to the chain.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 81 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4446fb54de6d..7ea7e9bcd5d7 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
>
>  #define page_chain_next(p)     ((struct page *)((p)->pp))
>  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> +#define page_dma_addr(p)       ((p)->dma_addr)
>
>  /* RX packet size EWMA. The average packet size is used to determine the packet
>   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
>         return (struct virtio_net_common_hdr *)skb->cb;
>  }
>
> +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> +{
> +       sg->dma_address = addr;
> +       sg->length = len;
> +}
> +
> +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> +{
> +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> +                                      DMA_FROM_DEVICE, 0);
> +
> +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> +}
> +
> +static int page_chain_map(struct receive_queue *rq, struct page *p)
> +{
> +       dma_addr_t addr;
> +
> +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> +               return -ENOMEM;
> +
> +       page_dma_addr(p) = addr;
> +       return 0;
> +}
> +
> +static void page_chain_release(struct receive_queue *rq)
> +{
> +       struct page *p, *n;
> +
> +       for (p = rq->pages; p; p = n) {
> +               n = page_chain_next(p);
> +
> +               page_chain_unmap(rq, p);
> +               __free_pages(p, 0);
> +       }
> +
> +       rq->pages = NULL;
> +}
> +
>  /*
>   * put the whole most recent used list in the beginning for reuse
>   */
> @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
>  {
>         struct page *end;
>
> +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {

This looks strange, the map should be done during allocation. Under
which condition could we hit this?

> +               if (page_chain_map(rq, page)) {
> +                       __free_pages(page, 0);
> +                       return;
> +               }
> +       }
> +
>         /* Find end of list, sew whole thing into vi->rq.pages. */
>         for (end = page; page_chain_next(end); end = page_chain_next(end));
>
> @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
>                 rq->pages = page_chain_next(p);
>                 /* clear chain here, it is used to chain pages */
>                 page_chain_add(p, NULL);
> -       } else
> +       } else {
>                 p = alloc_page(gfp_mask);
> +
> +               if (page_chain_map(rq, p)) {
> +                       __free_pages(p, 0);
> +                       return NULL;
> +               }
> +       }
> +
>         return p;
>  }
>
> @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>                         return NULL;
>
>                 page = page_chain_next(page);
> -               if (page)
> -                       give_pages(rq, page);
>                 goto ok;
>         }
>
> @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
>                 else
>                         page_to_free = page;
> +               page = NULL;
>                 goto ok;
>         }
>
> @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>         BUG_ON(offset >= PAGE_SIZE);
>         while (len) {
>                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> +
> +               /* unmap the page before using it. */
> +               if (!offset)
> +                       page_chain_unmap(rq, page);
> +

This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?

>                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
>                                 frag_size, truesize);
>                 len -= frag_size;
> @@ -664,15 +723,15 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>                 offset = 0;
>         }
>
> -       if (page)
> -               give_pages(rq, page);
> -
>  ok:
>         hdr = skb_vnet_common_hdr(skb);
>         memcpy(hdr, hdr_p, hdr_len);
>         if (page_to_free)
>                 put_page(page_to_free);
>
> +       if (page)
> +               give_pages(rq, page);
> +
>         return skb;
>  }
>
> @@ -823,7 +882,8 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
>
>         rq = &vi->rq[i];
>
> -       if (rq->do_dma)
> +       /* Skip the unmap for big mode. */
> +       if (!vi->big_packets || vi->mergeable_rx_bufs)
>                 virtnet_rq_unmap(rq, buf, 0);
>
>         virtnet_rq_free_buf(vi, rq, buf);
> @@ -1346,8 +1406,12 @@ static struct sk_buff *receive_big(struct net_device *dev,
>                                    struct virtnet_rq_stats *stats)
>  {
>         struct page *page = buf;
> -       struct sk_buff *skb =
> -               page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
> +       struct sk_buff *skb;
> +
> +       /* Unmap first page. The follow code may read this page. */
> +       page_chain_unmap(rq, page);

And probably here as well.

Thanks
Xuan Zhuo April 18, 2024, 8:29 a.m. UTC | #5
On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > used, we can reuse them without needing to unmap and remap.
> >
> > We require space to store the DMA address. I use the page.dma_addr to
> > store the DMA address from the pp structure inside the page.
> >
> > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > stored in page.dma_addr. When a page is returned to the chain, we check
> > the DMA status; if it is not mapped (potentially having been unmapped),
> > we remap it before returning it to the chain.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> >  1 file changed, 81 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> >
> >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > +#define page_dma_addr(p)       ((p)->dma_addr)
> >
> >  /* RX packet size EWMA. The average packet size is used to determine the packet
> >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> >         return (struct virtio_net_common_hdr *)skb->cb;
> >  }
> >
> > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > +{
> > +       sg->dma_address = addr;
> > +       sg->length = len;
> > +}
> > +
> > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > +{
> > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > +                                      DMA_FROM_DEVICE, 0);
> > +
> > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > +}
> > +
> > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > +{
> > +       dma_addr_t addr;
> > +
> > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > +               return -ENOMEM;
> > +
> > +       page_dma_addr(p) = addr;
> > +       return 0;
> > +}
> > +
> > +static void page_chain_release(struct receive_queue *rq)
> > +{
> > +       struct page *p, *n;
> > +
> > +       for (p = rq->pages; p; p = n) {
> > +               n = page_chain_next(p);
> > +
> > +               page_chain_unmap(rq, p);
> > +               __free_pages(p, 0);
> > +       }
> > +
> > +       rq->pages = NULL;
> > +}
> > +
> >  /*
> >   * put the whole most recent used list in the beginning for reuse
> >   */
> > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> >  {
> >         struct page *end;
> >
> > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
>
> This looks strange, the map should be done during allocation. Under
> which condition could we hit this?

This first page is umapped before we call page_to_skb().
The page can be put back to the link in case of failure.


>
> > +               if (page_chain_map(rq, page)) {
> > +                       __free_pages(page, 0);
> > +                       return;
> > +               }
> > +       }
> > +
> >         /* Find end of list, sew whole thing into vi->rq.pages. */
> >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> >
> > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> >                 rq->pages = page_chain_next(p);
> >                 /* clear chain here, it is used to chain pages */
> >                 page_chain_add(p, NULL);
> > -       } else
> > +       } else {
> >                 p = alloc_page(gfp_mask);
> > +
> > +               if (page_chain_map(rq, p)) {
> > +                       __free_pages(p, 0);
> > +                       return NULL;
> > +               }
> > +       }
> > +
> >         return p;
> >  }
> >
> > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >                         return NULL;
> >
> >                 page = page_chain_next(page);
> > -               if (page)
> > -                       give_pages(rq, page);
> >                 goto ok;
> >         }
> >
> > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> >                 else
> >                         page_to_free = page;
> > +               page = NULL;
> >                 goto ok;
> >         }
> >
> > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >         BUG_ON(offset >= PAGE_SIZE);
> >         while (len) {
> >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > +
> > +               /* unmap the page before using it. */
> > +               if (!offset)
> > +                       page_chain_unmap(rq, page);
> > +
>
> This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?

I think we do not need that. Because the umap api does it.
We do not work with DMA_SKIP_SYNC;

Thanks.


>
> >                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
> >                                 frag_size, truesize);
> >                 len -= frag_size;
> > @@ -664,15 +723,15 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >                 offset = 0;
> >         }
> >
> > -       if (page)
> > -               give_pages(rq, page);
> > -
> >  ok:
> >         hdr = skb_vnet_common_hdr(skb);
> >         memcpy(hdr, hdr_p, hdr_len);
> >         if (page_to_free)
> >                 put_page(page_to_free);
> >
> > +       if (page)
> > +               give_pages(rq, page);
> > +
> >         return skb;
> >  }
> >
> > @@ -823,7 +882,8 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
> >
> >         rq = &vi->rq[i];
> >
> > -       if (rq->do_dma)
> > +       /* Skip the unmap for big mode. */
> > +       if (!vi->big_packets || vi->mergeable_rx_bufs)
> >                 virtnet_rq_unmap(rq, buf, 0);
> >
> >         virtnet_rq_free_buf(vi, rq, buf);
> > @@ -1346,8 +1406,12 @@ static struct sk_buff *receive_big(struct net_device *dev,
> >                                    struct virtnet_rq_stats *stats)
> >  {
> >         struct page *page = buf;
> > -       struct sk_buff *skb =
> > -               page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
> > +       struct sk_buff *skb;
> > +
> > +       /* Unmap first page. The follow code may read this page. */
> > +       page_chain_unmap(rq, page);
>
> And probably here as well.
>
> Thanks
>
Jason Wang April 19, 2024, 12:43 a.m. UTC | #6
On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > used, we can reuse them without needing to unmap and remap.
> > >
> > > We require space to store the DMA address. I use the page.dma_addr to
> > > store the DMA address from the pp structure inside the page.
> > >
> > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > we remap it before returning it to the chain.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > >
> > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > >
> > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > >         return (struct virtio_net_common_hdr *)skb->cb;
> > >  }
> > >
> > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > +{
> > > +       sg->dma_address = addr;
> > > +       sg->length = len;
> > > +}
> > > +
> > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > +{
> > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > +                                      DMA_FROM_DEVICE, 0);
> > > +
> > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > +}
> > > +
> > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > +{
> > > +       dma_addr_t addr;
> > > +
> > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > +               return -ENOMEM;
> > > +
> > > +       page_dma_addr(p) = addr;
> > > +       return 0;
> > > +}
> > > +
> > > +static void page_chain_release(struct receive_queue *rq)
> > > +{
> > > +       struct page *p, *n;
> > > +
> > > +       for (p = rq->pages; p; p = n) {
> > > +               n = page_chain_next(p);
> > > +
> > > +               page_chain_unmap(rq, p);
> > > +               __free_pages(p, 0);
> > > +       }
> > > +
> > > +       rq->pages = NULL;
> > > +}
> > > +
> > >  /*
> > >   * put the whole most recent used list in the beginning for reuse
> > >   */
> > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > >  {
> > >         struct page *end;
> > >
> > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> >
> > This looks strange, the map should be done during allocation. Under
> > which condition could we hit this?
>
> This first page is umapped before we call page_to_skb().
> The page can be put back to the link in case of failure.

See below.

>
>
> >
> > > +               if (page_chain_map(rq, page)) {
> > > +                       __free_pages(page, 0);
> > > +                       return;
> > > +               }
> > > +       }
> > > +
> > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > >
> > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > >                 rq->pages = page_chain_next(p);
> > >                 /* clear chain here, it is used to chain pages */
> > >                 page_chain_add(p, NULL);
> > > -       } else
> > > +       } else {
> > >                 p = alloc_page(gfp_mask);
> > > +
> > > +               if (page_chain_map(rq, p)) {
> > > +                       __free_pages(p, 0);
> > > +                       return NULL;
> > > +               }
> > > +       }
> > > +
> > >         return p;
> > >  }
> > >
> > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > >                         return NULL;
> > >
> > >                 page = page_chain_next(page);
> > > -               if (page)
> > > -                       give_pages(rq, page);
> > >                 goto ok;
> > >         }
> > >
> > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > >                 else
> > >                         page_to_free = page;
> > > +               page = NULL;
> > >                 goto ok;
> > >         }
> > >
> > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > >         BUG_ON(offset >= PAGE_SIZE);
> > >         while (len) {
> > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > +
> > > +               /* unmap the page before using it. */
> > > +               if (!offset)
> > > +                       page_chain_unmap(rq, page);
> > > +
> >
> > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
>
> I think we do not need that. Because the umap api does it.
> We do not work with DMA_SKIP_SYNC;

Well, the problem is unmap is too heavyweight and it reduces the
effort of trying to avoid map/umaps as much as possible.

For example, for most of the case DMA sync is just a nop. And such
unmap() cause strange code in give_pages() as we discuss above?

Thanks
Xuan Zhuo April 19, 2024, 4:21 a.m. UTC | #7
On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > used, we can reuse them without needing to unmap and remap.
> > > >
> > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > store the DMA address from the pp structure inside the page.
> > > >
> > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > we remap it before returning it to the chain.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > >
> > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > >
> > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > >  }
> > > >
> > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > +{
> > > > +       sg->dma_address = addr;
> > > > +       sg->length = len;
> > > > +}
> > > > +
> > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > +{
> > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > +                                      DMA_FROM_DEVICE, 0);
> > > > +
> > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > +}
> > > > +
> > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > +{
> > > > +       dma_addr_t addr;
> > > > +
> > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > +               return -ENOMEM;
> > > > +
> > > > +       page_dma_addr(p) = addr;
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +static void page_chain_release(struct receive_queue *rq)
> > > > +{
> > > > +       struct page *p, *n;
> > > > +
> > > > +       for (p = rq->pages; p; p = n) {
> > > > +               n = page_chain_next(p);
> > > > +
> > > > +               page_chain_unmap(rq, p);
> > > > +               __free_pages(p, 0);
> > > > +       }
> > > > +
> > > > +       rq->pages = NULL;
> > > > +}
> > > > +
> > > >  /*
> > > >   * put the whole most recent used list in the beginning for reuse
> > > >   */
> > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > >  {
> > > >         struct page *end;
> > > >
> > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > >
> > > This looks strange, the map should be done during allocation. Under
> > > which condition could we hit this?
> >
> > This first page is umapped before we call page_to_skb().
> > The page can be put back to the link in case of failure.
>
> See below.
>
> >
> >
> > >
> > > > +               if (page_chain_map(rq, page)) {
> > > > +                       __free_pages(page, 0);
> > > > +                       return;
> > > > +               }
> > > > +       }
> > > > +
> > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > >
> > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > >                 rq->pages = page_chain_next(p);
> > > >                 /* clear chain here, it is used to chain pages */
> > > >                 page_chain_add(p, NULL);
> > > > -       } else
> > > > +       } else {
> > > >                 p = alloc_page(gfp_mask);
> > > > +
> > > > +               if (page_chain_map(rq, p)) {
> > > > +                       __free_pages(p, 0);
> > > > +                       return NULL;
> > > > +               }
> > > > +       }
> > > > +
> > > >         return p;
> > > >  }
> > > >
> > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > >                         return NULL;
> > > >
> > > >                 page = page_chain_next(page);
> > > > -               if (page)
> > > > -                       give_pages(rq, page);
> > > >                 goto ok;
> > > >         }
> > > >
> > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > >                 else
> > > >                         page_to_free = page;
> > > > +               page = NULL;
> > > >                 goto ok;
> > > >         }
> > > >
> > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > >         BUG_ON(offset >= PAGE_SIZE);
> > > >         while (len) {
> > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > +
> > > > +               /* unmap the page before using it. */
> > > > +               if (!offset)
> > > > +                       page_chain_unmap(rq, page);
> > > > +
> > >
> > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> >
> > I think we do not need that. Because the umap api does it.
> > We do not work with DMA_SKIP_SYNC;
>
> Well, the problem is unmap is too heavyweight and it reduces the
> effort of trying to avoid map/umaps as much as possible.
>
> For example, for most of the case DMA sync is just a nop. And such
> unmap() cause strange code in give_pages() as we discuss above?

YES. You are right. For the first page, we just need to sync for cpu.
And we do not need to check the dma status.
But here (in page_to_skb), we need to call unmap, because this page is put
to the skb.

Thanks.


>
> Thanks
>
Jason Wang April 19, 2024, 5:46 a.m. UTC | #8
On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > used, we can reuse them without needing to unmap and remap.
> > > > >
> > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > store the DMA address from the pp structure inside the page.
> > > > >
> > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > we remap it before returning it to the chain.
> > > > >
> > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > ---
> > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > >
> > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > >
> > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > >  }
> > > > >
> > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > +{
> > > > > +       sg->dma_address = addr;
> > > > > +       sg->length = len;
> > > > > +}
> > > > > +
> > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > +{
> > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > +
> > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > +}
> > > > > +
> > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > +{
> > > > > +       dma_addr_t addr;
> > > > > +
> > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > +               return -ENOMEM;
> > > > > +
> > > > > +       page_dma_addr(p) = addr;
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > +{
> > > > > +       struct page *p, *n;
> > > > > +
> > > > > +       for (p = rq->pages; p; p = n) {
> > > > > +               n = page_chain_next(p);
> > > > > +
> > > > > +               page_chain_unmap(rq, p);
> > > > > +               __free_pages(p, 0);
> > > > > +       }
> > > > > +
> > > > > +       rq->pages = NULL;
> > > > > +}
> > > > > +
> > > > >  /*
> > > > >   * put the whole most recent used list in the beginning for reuse
> > > > >   */
> > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > >  {
> > > > >         struct page *end;
> > > > >
> > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > >
> > > > This looks strange, the map should be done during allocation. Under
> > > > which condition could we hit this?
> > >
> > > This first page is umapped before we call page_to_skb().
> > > The page can be put back to the link in case of failure.
> >
> > See below.
> >
> > >
> > >
> > > >
> > > > > +               if (page_chain_map(rq, page)) {
> > > > > +                       __free_pages(page, 0);
> > > > > +                       return;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > >
> > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > >                 rq->pages = page_chain_next(p);
> > > > >                 /* clear chain here, it is used to chain pages */
> > > > >                 page_chain_add(p, NULL);
> > > > > -       } else
> > > > > +       } else {
> > > > >                 p = alloc_page(gfp_mask);
> > > > > +
> > > > > +               if (page_chain_map(rq, p)) {
> > > > > +                       __free_pages(p, 0);
> > > > > +                       return NULL;
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > >         return p;
> > > > >  }
> > > > >
> > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > >                         return NULL;
> > > > >
> > > > >                 page = page_chain_next(page);
> > > > > -               if (page)
> > > > > -                       give_pages(rq, page);
> > > > >                 goto ok;
> > > > >         }
> > > > >
> > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > >                 else
> > > > >                         page_to_free = page;
> > > > > +               page = NULL;
> > > > >                 goto ok;
> > > > >         }
> > > > >
> > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > >         while (len) {
> > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > +
> > > > > +               /* unmap the page before using it. */
> > > > > +               if (!offset)
> > > > > +                       page_chain_unmap(rq, page);
> > > > > +
> > > >
> > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > >
> > > I think we do not need that. Because the umap api does it.
> > > We do not work with DMA_SKIP_SYNC;
> >
> > Well, the problem is unmap is too heavyweight and it reduces the
> > effort of trying to avoid map/umaps as much as possible.
> >
> > For example, for most of the case DMA sync is just a nop. And such
> > unmap() cause strange code in give_pages() as we discuss above?
>
> YES. You are right. For the first page, we just need to sync for cpu.
> And we do not need to check the dma status.
> But here (in page_to_skb), we need to call unmap, because this page is put
> to the skb.

Right, but issue still,

The only case that we may hit

        if (page_dma_addr(page) == DMA_MAPPING_ERROR)

is when the packet is smaller than GOOD_COPY_LEN.

So if we sync_for_cpu for the head page, we don't do:

1) unmap in the receive_big()
2) do snyc_for_cpu() just before skb_put_data(), so the page could be
recycled to the pool without unmapping?

And I think we should do something similar for the mergeable case?

Btw, I found one the misleading comment introduced by f80bd740cb7c9

        /* copy small packet so we can reuse these pages */
        if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {

We're not copying but building skb around the head page.

Thanks

>
> Thanks.
>
>
> >
> > Thanks
> >
>
Xuan Zhuo April 19, 2024, 7:03 a.m. UTC | #9
On Fri, 19 Apr 2024 13:46:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > > used, we can reuse them without needing to unmap and remap.
> > > > > >
> > > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > > store the DMA address from the pp structure inside the page.
> > > > > >
> > > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > > we remap it before returning it to the chain.
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > ---
> > > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > > --- a/drivers/net/virtio_net.c
> > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > > >
> > > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > > >
> > > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > > >  }
> > > > > >
> > > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > > +{
> > > > > > +       sg->dma_address = addr;
> > > > > > +       sg->length = len;
> > > > > > +}
> > > > > > +
> > > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > > +{
> > > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > > +
> > > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > > +}
> > > > > > +
> > > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > > +{
> > > > > > +       dma_addr_t addr;
> > > > > > +
> > > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > > +               return -ENOMEM;
> > > > > > +
> > > > > > +       page_dma_addr(p) = addr;
> > > > > > +       return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > > +{
> > > > > > +       struct page *p, *n;
> > > > > > +
> > > > > > +       for (p = rq->pages; p; p = n) {
> > > > > > +               n = page_chain_next(p);
> > > > > > +
> > > > > > +               page_chain_unmap(rq, p);
> > > > > > +               __free_pages(p, 0);
> > > > > > +       }
> > > > > > +
> > > > > > +       rq->pages = NULL;
> > > > > > +}
> > > > > > +
> > > > > >  /*
> > > > > >   * put the whole most recent used list in the beginning for reuse
> > > > > >   */
> > > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > > >  {
> > > > > >         struct page *end;
> > > > > >
> > > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > > >
> > > > > This looks strange, the map should be done during allocation. Under
> > > > > which condition could we hit this?
> > > >
> > > > This first page is umapped before we call page_to_skb().
> > > > The page can be put back to the link in case of failure.
> > >
> > > See below.
> > >
> > > >
> > > >
> > > > >
> > > > > > +               if (page_chain_map(rq, page)) {
> > > > > > +                       __free_pages(page, 0);
> > > > > > +                       return;
> > > > > > +               }
> > > > > > +       }
> > > > > > +
> > > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > > >
> > > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > > >                 rq->pages = page_chain_next(p);
> > > > > >                 /* clear chain here, it is used to chain pages */
> > > > > >                 page_chain_add(p, NULL);
> > > > > > -       } else
> > > > > > +       } else {
> > > > > >                 p = alloc_page(gfp_mask);
> > > > > > +
> > > > > > +               if (page_chain_map(rq, p)) {
> > > > > > +                       __free_pages(p, 0);
> > > > > > +                       return NULL;
> > > > > > +               }
> > > > > > +       }
> > > > > > +
> > > > > >         return p;
> > > > > >  }
> > > > > >
> > > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > >                         return NULL;
> > > > > >
> > > > > >                 page = page_chain_next(page);
> > > > > > -               if (page)
> > > > > > -                       give_pages(rq, page);
> > > > > >                 goto ok;
> > > > > >         }
> > > > > >
> > > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > > >                 else
> > > > > >                         page_to_free = page;
> > > > > > +               page = NULL;
> > > > > >                 goto ok;
> > > > > >         }
> > > > > >
> > > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > > >         while (len) {
> > > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > > +
> > > > > > +               /* unmap the page before using it. */
> > > > > > +               if (!offset)
> > > > > > +                       page_chain_unmap(rq, page);
> > > > > > +
> > > > >
> > > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > > >
> > > > I think we do not need that. Because the umap api does it.
> > > > We do not work with DMA_SKIP_SYNC;
> > >
> > > Well, the problem is unmap is too heavyweight and it reduces the
> > > effort of trying to avoid map/umaps as much as possible.
> > >
> > > For example, for most of the case DMA sync is just a nop. And such
> > > unmap() cause strange code in give_pages() as we discuss above?
> >
> > YES. You are right. For the first page, we just need to sync for cpu.
> > And we do not need to check the dma status.
> > But here (in page_to_skb), we need to call unmap, because this page is put
> > to the skb.
>
> Right, but issue still,
>
> The only case that we may hit
>
>         if (page_dma_addr(page) == DMA_MAPPING_ERROR)
>
> is when the packet is smaller than GOOD_COPY_LEN.
>
> So if we sync_for_cpu for the head page, we don't do:
>
> 1) unmap in the receive_big()
> 2) do snyc_for_cpu() just before skb_put_data(), so the page could be
> recycled to the pool without unmapping?


I do not get.

I think we can remove the code "if (page_dma_addr(page) == DMA_MAPPING_ERROR)"
from give_pages(). We just do unmap when the page is leaving virtio-net.

>
> And I think we should do something similar for the mergeable case?

Do what?

We have used the sync api for mergeable case.


>
> Btw, I found one the misleading comment introduced by f80bd740cb7c9
>
>         /* copy small packet so we can reuse these pages */
>         if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
>
> We're not copying but building skb around the head page.

Will fix.

Thanks.


>
> Thanks
>
> >
> > Thanks.
> >
> >
> > >
> > > Thanks
> > >
> >
>
Jason Wang April 19, 2024, 7:24 a.m. UTC | #10
On Fri, Apr 19, 2024 at 3:07 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Fri, 19 Apr 2024 13:46:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > > > used, we can reuse them without needing to unmap and remap.
> > > > > > >
> > > > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > > > store the DMA address from the pp structure inside the page.
> > > > > > >
> > > > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > > > we remap it before returning it to the chain.
> > > > > > >
> > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > ---
> > > > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > > > >
> > > > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > > > >
> > > > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > > > >  }
> > > > > > >
> > > > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > > > +{
> > > > > > > +       sg->dma_address = addr;
> > > > > > > +       sg->length = len;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > > > +{
> > > > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > > > +
> > > > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > > > +{
> > > > > > > +       dma_addr_t addr;
> > > > > > > +
> > > > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > > > +               return -ENOMEM;
> > > > > > > +
> > > > > > > +       page_dma_addr(p) = addr;
> > > > > > > +       return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > > > +{
> > > > > > > +       struct page *p, *n;
> > > > > > > +
> > > > > > > +       for (p = rq->pages; p; p = n) {
> > > > > > > +               n = page_chain_next(p);
> > > > > > > +
> > > > > > > +               page_chain_unmap(rq, p);
> > > > > > > +               __free_pages(p, 0);
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       rq->pages = NULL;
> > > > > > > +}
> > > > > > > +
> > > > > > >  /*
> > > > > > >   * put the whole most recent used list in the beginning for reuse
> > > > > > >   */
> > > > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > > > >  {
> > > > > > >         struct page *end;
> > > > > > >
> > > > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > > > >
> > > > > > This looks strange, the map should be done during allocation. Under
> > > > > > which condition could we hit this?
> > > > >
> > > > > This first page is umapped before we call page_to_skb().
> > > > > The page can be put back to the link in case of failure.
> > > >
> > > > See below.
> > > >
> > > > >
> > > > >
> > > > > >
> > > > > > > +               if (page_chain_map(rq, page)) {
> > > > > > > +                       __free_pages(page, 0);
> > > > > > > +                       return;
> > > > > > > +               }
> > > > > > > +       }
> > > > > > > +
> > > > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > > > >
> > > > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > > > >                 rq->pages = page_chain_next(p);
> > > > > > >                 /* clear chain here, it is used to chain pages */
> > > > > > >                 page_chain_add(p, NULL);
> > > > > > > -       } else
> > > > > > > +       } else {
> > > > > > >                 p = alloc_page(gfp_mask);
> > > > > > > +
> > > > > > > +               if (page_chain_map(rq, p)) {
> > > > > > > +                       __free_pages(p, 0);
> > > > > > > +                       return NULL;
> > > > > > > +               }
> > > > > > > +       }
> > > > > > > +
> > > > > > >         return p;
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > >                         return NULL;
> > > > > > >
> > > > > > >                 page = page_chain_next(page);
> > > > > > > -               if (page)
> > > > > > > -                       give_pages(rq, page);
> > > > > > >                 goto ok;
> > > > > > >         }
> > > > > > >
> > > > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > > > >                 else
> > > > > > >                         page_to_free = page;
> > > > > > > +               page = NULL;
> > > > > > >                 goto ok;
> > > > > > >         }
> > > > > > >
> > > > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > > > >         while (len) {
> > > > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > > > +
> > > > > > > +               /* unmap the page before using it. */
> > > > > > > +               if (!offset)
> > > > > > > +                       page_chain_unmap(rq, page);
> > > > > > > +
> > > > > >
> > > > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > > > >
> > > > > I think we do not need that. Because the umap api does it.
> > > > > We do not work with DMA_SKIP_SYNC;
> > > >
> > > > Well, the problem is unmap is too heavyweight and it reduces the
> > > > effort of trying to avoid map/umaps as much as possible.
> > > >
> > > > For example, for most of the case DMA sync is just a nop. And such
> > > > unmap() cause strange code in give_pages() as we discuss above?
> > >
> > > YES. You are right. For the first page, we just need to sync for cpu.
> > > And we do not need to check the dma status.
> > > But here (in page_to_skb), we need to call unmap, because this page is put
> > > to the skb.
> >
> > Right, but issue still,
> >
> > The only case that we may hit
> >
> >         if (page_dma_addr(page) == DMA_MAPPING_ERROR)
> >
> > is when the packet is smaller than GOOD_COPY_LEN.
> >
> > So if we sync_for_cpu for the head page, we don't do:
> >
> > 1) unmap in the receive_big()
> > 2) do snyc_for_cpu() just before skb_put_data(), so the page could be
> > recycled to the pool without unmapping?
>
>
> I do not get.

I meant something like e1000_copybreak().

>
> I think we can remove the code "if (page_dma_addr(page) == DMA_MAPPING_ERROR)"
> from give_pages(). We just do unmap when the page is leaving virtio-net.

That's the point.

>
> >
> > And I think we should do something similar for the mergeable case?
>
> Do what?
>
> We have used the sync api for mergeable case.

Where?

I see virtnet_rq_get_buf which did sync but it is done after the page_to_skb().

>
>
> >
> > Btw, I found one the misleading comment introduced by f80bd740cb7c9
> >
> >         /* copy small packet so we can reuse these pages */
> >         if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> >
> > We're not copying but building skb around the head page.
>
> Will fix.
>
> Thanks.

Thanks

>
>
> >
> > Thanks
> >
> > >
> > > Thanks.
> > >
> > >
> > > >
> > > > Thanks
> > > >
> > >
> >
>
Xuan Zhuo April 19, 2024, 7:26 a.m. UTC | #11
On Fri, 19 Apr 2024 15:24:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Apr 19, 2024 at 3:07 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 19 Apr 2024 13:46:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > > > > used, we can reuse them without needing to unmap and remap.
> > > > > > > >
> > > > > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > > > > store the DMA address from the pp structure inside the page.
> > > > > > > >
> > > > > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > > > > we remap it before returning it to the chain.
> > > > > > > >
> > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > ---
> > > > > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > > > > >
> > > > > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > > > > >
> > > > > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > > > > +{
> > > > > > > > +       sg->dma_address = addr;
> > > > > > > > +       sg->length = len;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > > > > +{
> > > > > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > > > > +
> > > > > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > > > > +{
> > > > > > > > +       dma_addr_t addr;
> > > > > > > > +
> > > > > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > > > > +               return -ENOMEM;
> > > > > > > > +
> > > > > > > > +       page_dma_addr(p) = addr;
> > > > > > > > +       return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > > > > +{
> > > > > > > > +       struct page *p, *n;
> > > > > > > > +
> > > > > > > > +       for (p = rq->pages; p; p = n) {
> > > > > > > > +               n = page_chain_next(p);
> > > > > > > > +
> > > > > > > > +               page_chain_unmap(rq, p);
> > > > > > > > +               __free_pages(p, 0);
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > > +       rq->pages = NULL;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  /*
> > > > > > > >   * put the whole most recent used list in the beginning for reuse
> > > > > > > >   */
> > > > > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > > > > >  {
> > > > > > > >         struct page *end;
> > > > > > > >
> > > > > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > > > > >
> > > > > > > This looks strange, the map should be done during allocation. Under
> > > > > > > which condition could we hit this?
> > > > > >
> > > > > > This first page is umapped before we call page_to_skb().
> > > > > > The page can be put back to the link in case of failure.
> > > > >
> > > > > See below.
> > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > +               if (page_chain_map(rq, page)) {
> > > > > > > > +                       __free_pages(page, 0);
> > > > > > > > +                       return;
> > > > > > > > +               }
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > > > > >
> > > > > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > > > > >                 rq->pages = page_chain_next(p);
> > > > > > > >                 /* clear chain here, it is used to chain pages */
> > > > > > > >                 page_chain_add(p, NULL);
> > > > > > > > -       } else
> > > > > > > > +       } else {
> > > > > > > >                 p = alloc_page(gfp_mask);
> > > > > > > > +
> > > > > > > > +               if (page_chain_map(rq, p)) {
> > > > > > > > +                       __free_pages(p, 0);
> > > > > > > > +                       return NULL;
> > > > > > > > +               }
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > >         return p;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > >                         return NULL;
> > > > > > > >
> > > > > > > >                 page = page_chain_next(page);
> > > > > > > > -               if (page)
> > > > > > > > -                       give_pages(rq, page);
> > > > > > > >                 goto ok;
> > > > > > > >         }
> > > > > > > >
> > > > > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > > > > >                 else
> > > > > > > >                         page_to_free = page;
> > > > > > > > +               page = NULL;
> > > > > > > >                 goto ok;
> > > > > > > >         }
> > > > > > > >
> > > > > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > > > > >         while (len) {
> > > > > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > > > > +
> > > > > > > > +               /* unmap the page before using it. */
> > > > > > > > +               if (!offset)
> > > > > > > > +                       page_chain_unmap(rq, page);
> > > > > > > > +
> > > > > > >
> > > > > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > > > > >
> > > > > > I think we do not need that. Because the umap api does it.
> > > > > > We do not work with DMA_SKIP_SYNC;
> > > > >
> > > > > Well, the problem is unmap is too heavyweight and it reduces the
> > > > > effort of trying to avoid map/umaps as much as possible.
> > > > >
> > > > > For example, for most of the case DMA sync is just a nop. And such
> > > > > unmap() cause strange code in give_pages() as we discuss above?
> > > >
> > > > YES. You are right. For the first page, we just need to sync for cpu.
> > > > And we do not need to check the dma status.
> > > > But here (in page_to_skb), we need to call unmap, because this page is put
> > > > to the skb.
> > >
> > > Right, but issue still,
> > >
> > > The only case that we may hit
> > >
> > >         if (page_dma_addr(page) == DMA_MAPPING_ERROR)
> > >
> > > is when the packet is smaller than GOOD_COPY_LEN.
> > >
> > > So if we sync_for_cpu for the head page, we don't do:
> > >
> > > 1) unmap in the receive_big()
> > > 2) do snyc_for_cpu() just before skb_put_data(), so the page could be
> > > recycled to the pool without unmapping?
> >
> >
> > I do not get.
>
> I meant something like e1000_copybreak().
>
> >
> > I think we can remove the code "if (page_dma_addr(page) == DMA_MAPPING_ERROR)"
> > from give_pages(). We just do unmap when the page is leaving virtio-net.
>
> That's the point.
>
> >
> > >
> > > And I think we should do something similar for the mergeable case?
> >
> > Do what?
> >
> > We have used the sync api for mergeable case.
>
> Where?
>
> I see virtnet_rq_get_buf which did sync but it is done after the page_to_skb().

What means "done"?

Do you want to reuse the buffer?

Thanks.

>
> >
> >
> > >
> > > Btw, I found one the misleading comment introduced by f80bd740cb7c9
> > >
> > >         /* copy small packet so we can reuse these pages */
> > >         if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> > >
> > > We're not copying but building skb around the head page.
> >
> > Will fix.
> >
> > Thanks.
>
> Thanks
>
> >
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > >
> > >
> >
>
Xuan Zhuo April 19, 2024, 7:52 a.m. UTC | #12
On Fri, 19 Apr 2024 15:24:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Apr 19, 2024 at 3:07 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 19 Apr 2024 13:46:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > > > > used, we can reuse them without needing to unmap and remap.
> > > > > > > >
> > > > > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > > > > store the DMA address from the pp structure inside the page.
> > > > > > > >
> > > > > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > > > > we remap it before returning it to the chain.
> > > > > > > >
> > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > ---
> > > > > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > > > > >
> > > > > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > > > > >
> > > > > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > > > > +{
> > > > > > > > +       sg->dma_address = addr;
> > > > > > > > +       sg->length = len;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > > > > +{
> > > > > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > > > > +
> > > > > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > > > > +{
> > > > > > > > +       dma_addr_t addr;
> > > > > > > > +
> > > > > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > > > > +               return -ENOMEM;
> > > > > > > > +
> > > > > > > > +       page_dma_addr(p) = addr;
> > > > > > > > +       return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > > > > +{
> > > > > > > > +       struct page *p, *n;
> > > > > > > > +
> > > > > > > > +       for (p = rq->pages; p; p = n) {
> > > > > > > > +               n = page_chain_next(p);
> > > > > > > > +
> > > > > > > > +               page_chain_unmap(rq, p);
> > > > > > > > +               __free_pages(p, 0);
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > > +       rq->pages = NULL;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  /*
> > > > > > > >   * put the whole most recent used list in the beginning for reuse
> > > > > > > >   */
> > > > > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > > > > >  {
> > > > > > > >         struct page *end;
> > > > > > > >
> > > > > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > > > > >
> > > > > > > This looks strange, the map should be done during allocation. Under
> > > > > > > which condition could we hit this?
> > > > > >
> > > > > > This first page is umapped before we call page_to_skb().
> > > > > > The page can be put back to the link in case of failure.
> > > > >
> > > > > See below.
> > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > > +               if (page_chain_map(rq, page)) {
> > > > > > > > +                       __free_pages(page, 0);
> > > > > > > > +                       return;
> > > > > > > > +               }
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > > > > >
> > > > > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > > > > >                 rq->pages = page_chain_next(p);
> > > > > > > >                 /* clear chain here, it is used to chain pages */
> > > > > > > >                 page_chain_add(p, NULL);
> > > > > > > > -       } else
> > > > > > > > +       } else {
> > > > > > > >                 p = alloc_page(gfp_mask);
> > > > > > > > +
> > > > > > > > +               if (page_chain_map(rq, p)) {
> > > > > > > > +                       __free_pages(p, 0);
> > > > > > > > +                       return NULL;
> > > > > > > > +               }
> > > > > > > > +       }
> > > > > > > > +
> > > > > > > >         return p;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > >                         return NULL;
> > > > > > > >
> > > > > > > >                 page = page_chain_next(page);
> > > > > > > > -               if (page)
> > > > > > > > -                       give_pages(rq, page);
> > > > > > > >                 goto ok;
> > > > > > > >         }
> > > > > > > >
> > > > > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > > > > >                 else
> > > > > > > >                         page_to_free = page;
> > > > > > > > +               page = NULL;
> > > > > > > >                 goto ok;
> > > > > > > >         }
> > > > > > > >
> > > > > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > > > > >         while (len) {
> > > > > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > > > > +
> > > > > > > > +               /* unmap the page before using it. */
> > > > > > > > +               if (!offset)
> > > > > > > > +                       page_chain_unmap(rq, page);
> > > > > > > > +
> > > > > > >
> > > > > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > > > > >
> > > > > > I think we do not need that. Because the umap api does it.
> > > > > > We do not work with DMA_SKIP_SYNC;
> > > > >
> > > > > Well, the problem is unmap is too heavyweight and it reduces the
> > > > > effort of trying to avoid map/umaps as much as possible.
> > > > >
> > > > > For example, for most of the case DMA sync is just a nop. And such
> > > > > unmap() cause strange code in give_pages() as we discuss above?
> > > >
> > > > YES. You are right. For the first page, we just need to sync for cpu.
> > > > And we do not need to check the dma status.
> > > > But here (in page_to_skb), we need to call unmap, because this page is put
> > > > to the skb.
> > >
> > > Right, but issue still,
> > >
> > > The only case that we may hit
> > >
> > >         if (page_dma_addr(page) == DMA_MAPPING_ERROR)
> > >
> > > is when the packet is smaller than GOOD_COPY_LEN.
> > >
> > > So if we sync_for_cpu for the head page, we don't do:
> > >
> > > 1) unmap in the receive_big()
> > > 2) do snyc_for_cpu() just before skb_put_data(), so the page could be
> > > recycled to the pool without unmapping?
> >
> >
> > I do not get.
>
> I meant something like e1000_copybreak().
>
> >
> > I think we can remove the code "if (page_dma_addr(page) == DMA_MAPPING_ERROR)"
> > from give_pages(). We just do unmap when the page is leaving virtio-net.
>
> That's the point.
>
> >
> > >
> > > And I think we should do something similar for the mergeable case?
> >
> > Do what?
> >
> > We have used the sync api for mergeable case.
>
> Where?
>
> I see virtnet_rq_get_buf which did sync but it is done after the page_to_skb().

Do you want to refill the buffer to vq?

But the page_frag doest not support to recycle buffer.

Thanks.


>
> >
> >
> > >
> > > Btw, I found one the misleading comment introduced by f80bd740cb7c9
> > >
> > >         /* copy small packet so we can reuse these pages */
> > >         if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> > >
> > > We're not copying but building skb around the head page.
> >
> > Will fix.
> >
> > Thanks.
>
> Thanks
>
> >
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks.
> > > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > >
> > >
> >
>
Jason Wang April 19, 2024, 8:12 a.m. UTC | #13
On Fri, Apr 19, 2024 at 3:28 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Fri, 19 Apr 2024 15:24:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Fri, Apr 19, 2024 at 3:07 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Fri, 19 Apr 2024 13:46:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > >
> > > > > > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > >
> > > > > > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > > > > > used, we can reuse them without needing to unmap and remap.
> > > > > > > > >
> > > > > > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > > > > > store the DMA address from the pp structure inside the page.
> > > > > > > > >
> > > > > > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > > > > > we remap it before returning it to the chain.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > > > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > > > > > >
> > > > > > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > > > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > > > > > >
> > > > > > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > > > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > > > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > > > > > +{
> > > > > > > > > +       sg->dma_address = addr;
> > > > > > > > > +       sg->length = len;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > > > > > +{
> > > > > > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > > > > > +
> > > > > > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > > > > > +{
> > > > > > > > > +       dma_addr_t addr;
> > > > > > > > > +
> > > > > > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > > > > > +               return -ENOMEM;
> > > > > > > > > +
> > > > > > > > > +       page_dma_addr(p) = addr;
> > > > > > > > > +       return 0;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > > > > > +{
> > > > > > > > > +       struct page *p, *n;
> > > > > > > > > +
> > > > > > > > > +       for (p = rq->pages; p; p = n) {
> > > > > > > > > +               n = page_chain_next(p);
> > > > > > > > > +
> > > > > > > > > +               page_chain_unmap(rq, p);
> > > > > > > > > +               __free_pages(p, 0);
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > > +       rq->pages = NULL;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  /*
> > > > > > > > >   * put the whole most recent used list in the beginning for reuse
> > > > > > > > >   */
> > > > > > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > > > > > >  {
> > > > > > > > >         struct page *end;
> > > > > > > > >
> > > > > > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > > > > > >
> > > > > > > > This looks strange, the map should be done during allocation. Under
> > > > > > > > which condition could we hit this?
> > > > > > >
> > > > > > > This first page is umapped before we call page_to_skb().
> > > > > > > The page can be put back to the link in case of failure.
> > > > > >
> > > > > > See below.
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > > +               if (page_chain_map(rq, page)) {
> > > > > > > > > +                       __free_pages(page, 0);
> > > > > > > > > +                       return;
> > > > > > > > > +               }
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > > > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > > > > > >
> > > > > > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > > > > > >                 rq->pages = page_chain_next(p);
> > > > > > > > >                 /* clear chain here, it is used to chain pages */
> > > > > > > > >                 page_chain_add(p, NULL);
> > > > > > > > > -       } else
> > > > > > > > > +       } else {
> > > > > > > > >                 p = alloc_page(gfp_mask);
> > > > > > > > > +
> > > > > > > > > +               if (page_chain_map(rq, p)) {
> > > > > > > > > +                       __free_pages(p, 0);
> > > > > > > > > +                       return NULL;
> > > > > > > > > +               }
> > > > > > > > > +       }
> > > > > > > > > +
> > > > > > > > >         return p;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > > >                         return NULL;
> > > > > > > > >
> > > > > > > > >                 page = page_chain_next(page);
> > > > > > > > > -               if (page)
> > > > > > > > > -                       give_pages(rq, page);
> > > > > > > > >                 goto ok;
> > > > > > > > >         }
> > > > > > > > >
> > > > > > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > > > > > >                 else
> > > > > > > > >                         page_to_free = page;
> > > > > > > > > +               page = NULL;
> > > > > > > > >                 goto ok;
> > > > > > > > >         }
> > > > > > > > >
> > > > > > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > > > > > >         while (len) {
> > > > > > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > > > > > +
> > > > > > > > > +               /* unmap the page before using it. */
> > > > > > > > > +               if (!offset)
> > > > > > > > > +                       page_chain_unmap(rq, page);
> > > > > > > > > +
> > > > > > > >
> > > > > > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > > > > > >
> > > > > > > I think we do not need that. Because the umap api does it.
> > > > > > > We do not work with DMA_SKIP_SYNC;
> > > > > >
> > > > > > Well, the problem is unmap is too heavyweight and it reduces the
> > > > > > effort of trying to avoid map/umaps as much as possible.
> > > > > >
> > > > > > For example, for most of the case DMA sync is just a nop. And such
> > > > > > unmap() cause strange code in give_pages() as we discuss above?
> > > > >
> > > > > YES. You are right. For the first page, we just need to sync for cpu.
> > > > > And we do not need to check the dma status.
> > > > > But here (in page_to_skb), we need to call unmap, because this page is put
> > > > > to the skb.
> > > >
> > > > Right, but issue still,
> > > >
> > > > The only case that we may hit
> > > >
> > > >         if (page_dma_addr(page) == DMA_MAPPING_ERROR)
> > > >
> > > > is when the packet is smaller than GOOD_COPY_LEN.
> > > >
> > > > So if we sync_for_cpu for the head page, we don't do:
> > > >
> > > > 1) unmap in the receive_big()
> > > > 2) do snyc_for_cpu() just before skb_put_data(), so the page could be
> > > > recycled to the pool without unmapping?
> > >
> > >
> > > I do not get.
> >
> > I meant something like e1000_copybreak().
> >
> > >
> > > I think we can remove the code "if (page_dma_addr(page) == DMA_MAPPING_ERROR)"
> > > from give_pages(). We just do unmap when the page is leaving virtio-net.
> >
> > That's the point.
> >
> > >
> > > >
> > > > And I think we should do something similar for the mergeable case?
> > >
> > > Do what?
> > >
> > > We have used the sync api for mergeable case.
> >
> > Where?
> >
> > I see virtnet_rq_get_buf which did sync but it is done after the page_to_skb().
>
> What means "done"?
>
> Do you want to reuse the buffer?

Nope, I think I misread the code. Mergeable buffers should be fine as
the unmap were during virtnet_receive().

But the code might needs some tweak in the future

in virtnet_receive() we had:

if (!vi->big_packets || vi->mergeable_rx_bufs) {
        virtnet_rq_get_buf
        receive_buf()
} else {
        virtqueue_get_buf()
}

but there's another switch in receive_buf():

if (vi->mergeable_rx_bufs)
else if (vi->big_packets)
else
...

Which is kind of a mess somehow.

Thanks
>
> Thanks.
>
> >
> > >
> > >
> > > >
> > > > Btw, I found one the misleading comment introduced by f80bd740cb7c9
> > > >
> > > >         /* copy small packet so we can reuse these pages */
> > > >         if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> > > >
> > > > We're not copying but building skb around the head page.
> > >
> > > Will fix.
> > >
> > > Thanks.
> >
> > Thanks
> >
> > >
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks.
> > > > >
> > > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > >
> > > >
> > >
> >
>
Xuan Zhuo April 19, 2024, 8:14 a.m. UTC | #14
On Fri, 19 Apr 2024 16:12:15 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Apr 19, 2024 at 3:28 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 19 Apr 2024 15:24:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Fri, Apr 19, 2024 at 3:07 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Fri, 19 Apr 2024 13:46:25 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > On Fri, Apr 19, 2024 at 12:23 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Fri, 19 Apr 2024 08:43:43 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > On Thu, Apr 18, 2024 at 4:35 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, 18 Apr 2024 14:25:06 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > On Thu, Apr 11, 2024 at 10:51 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > > > > > >
> > > > > > > > > > In big mode, pre-mapping DMA is beneficial because if the pages are not
> > > > > > > > > > used, we can reuse them without needing to unmap and remap.
> > > > > > > > > >
> > > > > > > > > > We require space to store the DMA address. I use the page.dma_addr to
> > > > > > > > > > store the DMA address from the pp structure inside the page.
> > > > > > > > > >
> > > > > > > > > > Every page retrieved from get_a_page() is mapped, and its DMA address is
> > > > > > > > > > stored in page.dma_addr. When a page is returned to the chain, we check
> > > > > > > > > > the DMA status; if it is not mapped (potentially having been unmapped),
> > > > > > > > > > we remap it before returning it to the chain.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++-------
> > > > > > > > > >  1 file changed, 81 insertions(+), 17 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > > > > > index 4446fb54de6d..7ea7e9bcd5d7 100644
> > > > > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > > > > @@ -50,6 +50,7 @@ module_param(napi_tx, bool, 0644);
> > > > > > > > > >
> > > > > > > > > >  #define page_chain_next(p)     ((struct page *)((p)->pp))
> > > > > > > > > >  #define page_chain_add(p, n)   ((p)->pp = (void *)n)
> > > > > > > > > > +#define page_dma_addr(p)       ((p)->dma_addr)
> > > > > > > > > >
> > > > > > > > > >  /* RX packet size EWMA. The average packet size is used to determine the packet
> > > > > > > > > >   * buffer size when refilling RX rings. As the entire RX ring may be refilled
> > > > > > > > > > @@ -434,6 +435,46 @@ skb_vnet_common_hdr(struct sk_buff *skb)
> > > > > > > > > >         return (struct virtio_net_common_hdr *)skb->cb;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > > > > > > > > +{
> > > > > > > > > > +       sg->dma_address = addr;
> > > > > > > > > > +       sg->length = len;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static void page_chain_unmap(struct receive_queue *rq, struct page *p)
> > > > > > > > > > +{
> > > > > > > > > > +       virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
> > > > > > > > > > +                                      DMA_FROM_DEVICE, 0);
> > > > > > > > > > +
> > > > > > > > > > +       page_dma_addr(p) = DMA_MAPPING_ERROR;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static int page_chain_map(struct receive_queue *rq, struct page *p)
> > > > > > > > > > +{
> > > > > > > > > > +       dma_addr_t addr;
> > > > > > > > > > +
> > > > > > > > > > +       addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
> > > > > > > > > > +       if (virtqueue_dma_mapping_error(rq->vq, addr))
> > > > > > > > > > +               return -ENOMEM;
> > > > > > > > > > +
> > > > > > > > > > +       page_dma_addr(p) = addr;
> > > > > > > > > > +       return 0;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static void page_chain_release(struct receive_queue *rq)
> > > > > > > > > > +{
> > > > > > > > > > +       struct page *p, *n;
> > > > > > > > > > +
> > > > > > > > > > +       for (p = rq->pages; p; p = n) {
> > > > > > > > > > +               n = page_chain_next(p);
> > > > > > > > > > +
> > > > > > > > > > +               page_chain_unmap(rq, p);
> > > > > > > > > > +               __free_pages(p, 0);
> > > > > > > > > > +       }
> > > > > > > > > > +
> > > > > > > > > > +       rq->pages = NULL;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  /*
> > > > > > > > > >   * put the whole most recent used list in the beginning for reuse
> > > > > > > > > >   */
> > > > > > > > > > @@ -441,6 +482,13 @@ static void give_pages(struct receive_queue *rq, struct page *page)
> > > > > > > > > >  {
> > > > > > > > > >         struct page *end;
> > > > > > > > > >
> > > > > > > > > > +       if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
> > > > > > > > >
> > > > > > > > > This looks strange, the map should be done during allocation. Under
> > > > > > > > > which condition could we hit this?
> > > > > > > >
> > > > > > > > This first page is umapped before we call page_to_skb().
> > > > > > > > The page can be put back to the link in case of failure.
> > > > > > >
> > > > > > > See below.
> > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +               if (page_chain_map(rq, page)) {
> > > > > > > > > > +                       __free_pages(page, 0);
> > > > > > > > > > +                       return;
> > > > > > > > > > +               }
> > > > > > > > > > +       }
> > > > > > > > > > +
> > > > > > > > > >         /* Find end of list, sew whole thing into vi->rq.pages. */
> > > > > > > > > >         for (end = page; page_chain_next(end); end = page_chain_next(end));
> > > > > > > > > >
> > > > > > > > > > @@ -456,8 +504,15 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
> > > > > > > > > >                 rq->pages = page_chain_next(p);
> > > > > > > > > >                 /* clear chain here, it is used to chain pages */
> > > > > > > > > >                 page_chain_add(p, NULL);
> > > > > > > > > > -       } else
> > > > > > > > > > +       } else {
> > > > > > > > > >                 p = alloc_page(gfp_mask);
> > > > > > > > > > +
> > > > > > > > > > +               if (page_chain_map(rq, p)) {
> > > > > > > > > > +                       __free_pages(p, 0);
> > > > > > > > > > +                       return NULL;
> > > > > > > > > > +               }
> > > > > > > > > > +       }
> > > > > > > > > > +
> > > > > > > > > >         return p;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > @@ -613,8 +668,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > > > >                         return NULL;
> > > > > > > > > >
> > > > > > > > > >                 page = page_chain_next(page);
> > > > > > > > > > -               if (page)
> > > > > > > > > > -                       give_pages(rq, page);
> > > > > > > > > >                 goto ok;
> > > > > > > > > >         }
> > > > > > > > > >
> > > > > > > > > > @@ -640,6 +693,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > > > >                         skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> > > > > > > > > >                 else
> > > > > > > > > >                         page_to_free = page;
> > > > > > > > > > +               page = NULL;
> > > > > > > > > >                 goto ok;
> > > > > > > > > >         }
> > > > > > > > > >
> > > > > > > > > > @@ -657,6 +711,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> > > > > > > > > >         BUG_ON(offset >= PAGE_SIZE);
> > > > > > > > > >         while (len) {
> > > > > > > > > >                 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
> > > > > > > > > > +
> > > > > > > > > > +               /* unmap the page before using it. */
> > > > > > > > > > +               if (!offset)
> > > > > > > > > > +                       page_chain_unmap(rq, page);
> > > > > > > > > > +
> > > > > > > > >
> > > > > > > > > This sounds strange, do we need a virtqueue_sync_for_cpu() helper here?
> > > > > > > >
> > > > > > > > I think we do not need that. Because the umap api does it.
> > > > > > > > We do not work with DMA_SKIP_SYNC;
> > > > > > >
> > > > > > > Well, the problem is unmap is too heavyweight and it reduces the
> > > > > > > effort of trying to avoid map/umaps as much as possible.
> > > > > > >
> > > > > > > For example, for most of the case DMA sync is just a nop. And such
> > > > > > > unmap() cause strange code in give_pages() as we discuss above?
> > > > > >
> > > > > > YES. You are right. For the first page, we just need to sync for cpu.
> > > > > > And we do not need to check the dma status.
> > > > > > But here (in page_to_skb), we need to call unmap, because this page is put
> > > > > > to the skb.
> > > > >
> > > > > Right, but issue still,
> > > > >
> > > > > The only case that we may hit
> > > > >
> > > > >         if (page_dma_addr(page) == DMA_MAPPING_ERROR)
> > > > >
> > > > > is when the packet is smaller than GOOD_COPY_LEN.
> > > > >
> > > > > So if we sync_for_cpu for the head page, we don't do:
> > > > >
> > > > > 1) unmap in the receive_big()
> > > > > 2) do snyc_for_cpu() just before skb_put_data(), so the page could be
> > > > > recycled to the pool without unmapping?
> > > >
> > > >
> > > > I do not get.
> > >
> > > I meant something like e1000_copybreak().
> > >
> > > >
> > > > I think we can remove the code "if (page_dma_addr(page) == DMA_MAPPING_ERROR)"
> > > > from give_pages(). We just do unmap when the page is leaving virtio-net.
> > >
> > > That's the point.
> > >
> > > >
> > > > >
> > > > > And I think we should do something similar for the mergeable case?
> > > >
> > > > Do what?
> > > >
> > > > We have used the sync api for mergeable case.
> > >
> > > Where?
> > >
> > > I see virtnet_rq_get_buf which did sync but it is done after the page_to_skb().
> >
> > What means "done"?
> >
> > Do you want to reuse the buffer?
>
> Nope, I think I misread the code. Mergeable buffers should be fine as
> the unmap were during virtnet_receive().
>
> But the code might needs some tweak in the future
>
> in virtnet_receive() we had:
>
> if (!vi->big_packets || vi->mergeable_rx_bufs) {
>         virtnet_rq_get_buf
>         receive_buf()
> } else {
>         virtqueue_get_buf()
> }
>
> but there's another switch in receive_buf():
>
> if (vi->mergeable_rx_bufs)
> else if (vi->big_packets)
> else
> ...
>
> Which is kind of a mess somehow.

YES. I will change this in the AF_XDP patch set.

Thanks.


>
> Thanks
> >
> > Thanks.
> >
> > >
> > > >
> > > >
> > > > >
> > > > > Btw, I found one the misleading comment introduced by f80bd740cb7c9
> > > > >
> > > > >         /* copy small packet so we can reuse these pages */
> > > > >         if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> > > > >
> > > > > We're not copying but building skb around the head page.
> > > >
> > > > Will fix.
> > > >
> > > > Thanks.
> > >
> > > Thanks
> > >
> > > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4446fb54de6d..7ea7e9bcd5d7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -50,6 +50,7 @@  module_param(napi_tx, bool, 0644);
 
 #define page_chain_next(p)	((struct page *)((p)->pp))
 #define page_chain_add(p, n)	((p)->pp = (void *)n)
+#define page_dma_addr(p)	((p)->dma_addr)
 
 /* RX packet size EWMA. The average packet size is used to determine the packet
  * buffer size when refilling RX rings. As the entire RX ring may be refilled
@@ -434,6 +435,46 @@  skb_vnet_common_hdr(struct sk_buff *skb)
 	return (struct virtio_net_common_hdr *)skb->cb;
 }
 
+static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
+{
+	sg->dma_address = addr;
+	sg->length = len;
+}
+
+static void page_chain_unmap(struct receive_queue *rq, struct page *p)
+{
+	virtqueue_dma_unmap_page_attrs(rq->vq, page_dma_addr(p), PAGE_SIZE,
+				       DMA_FROM_DEVICE, 0);
+
+	page_dma_addr(p) = DMA_MAPPING_ERROR;
+}
+
+static int page_chain_map(struct receive_queue *rq, struct page *p)
+{
+	dma_addr_t addr;
+
+	addr = virtqueue_dma_map_page_attrs(rq->vq, p, 0, PAGE_SIZE, DMA_FROM_DEVICE, 0);
+	if (virtqueue_dma_mapping_error(rq->vq, addr))
+		return -ENOMEM;
+
+	page_dma_addr(p) = addr;
+	return 0;
+}
+
+static void page_chain_release(struct receive_queue *rq)
+{
+	struct page *p, *n;
+
+	for (p = rq->pages; p; p = n) {
+		n = page_chain_next(p);
+
+		page_chain_unmap(rq, p);
+		__free_pages(p, 0);
+	}
+
+	rq->pages = NULL;
+}
+
 /*
  * put the whole most recent used list in the beginning for reuse
  */
@@ -441,6 +482,13 @@  static void give_pages(struct receive_queue *rq, struct page *page)
 {
 	struct page *end;
 
+	if (page_dma_addr(page) == DMA_MAPPING_ERROR) {
+		if (page_chain_map(rq, page)) {
+			__free_pages(page, 0);
+			return;
+		}
+	}
+
 	/* Find end of list, sew whole thing into vi->rq.pages. */
 	for (end = page; page_chain_next(end); end = page_chain_next(end));
 
@@ -456,8 +504,15 @@  static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 		rq->pages = page_chain_next(p);
 		/* clear chain here, it is used to chain pages */
 		page_chain_add(p, NULL);
-	} else
+	} else {
 		p = alloc_page(gfp_mask);
+
+		if (page_chain_map(rq, p)) {
+			__free_pages(p, 0);
+			return NULL;
+		}
+	}
+
 	return p;
 }
 
@@ -613,8 +668,6 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 			return NULL;
 
 		page = page_chain_next(page);
-		if (page)
-			give_pages(rq, page);
 		goto ok;
 	}
 
@@ -640,6 +693,7 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
 		else
 			page_to_free = page;
+		page = NULL;
 		goto ok;
 	}
 
@@ -657,6 +711,11 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	BUG_ON(offset >= PAGE_SIZE);
 	while (len) {
 		unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
+
+		/* unmap the page before using it. */
+		if (!offset)
+			page_chain_unmap(rq, page);
+
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
 				frag_size, truesize);
 		len -= frag_size;
@@ -664,15 +723,15 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 		offset = 0;
 	}
 
-	if (page)
-		give_pages(rq, page);
-
 ok:
 	hdr = skb_vnet_common_hdr(skb);
 	memcpy(hdr, hdr_p, hdr_len);
 	if (page_to_free)
 		put_page(page_to_free);
 
+	if (page)
+		give_pages(rq, page);
+
 	return skb;
 }
 
@@ -823,7 +882,8 @@  static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
 
 	rq = &vi->rq[i];
 
-	if (rq->do_dma)
+	/* Skip the unmap for big mode. */
+	if (!vi->big_packets || vi->mergeable_rx_bufs)
 		virtnet_rq_unmap(rq, buf, 0);
 
 	virtnet_rq_free_buf(vi, rq, buf);
@@ -1346,8 +1406,12 @@  static struct sk_buff *receive_big(struct net_device *dev,
 				   struct virtnet_rq_stats *stats)
 {
 	struct page *page = buf;
-	struct sk_buff *skb =
-		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
+	struct sk_buff *skb;
+
+	/* Unmap first page. The follow code may read this page. */
+	page_chain_unmap(rq, page);
+
+	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
 
 	u64_stats_add(&stats->bytes, len - vi->hdr_len);
 	if (unlikely(!skb))
@@ -1896,7 +1960,7 @@  static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 			   gfp_t gfp)
 {
 	struct page *first, *list = NULL;
-	char *p;
+	dma_addr_t p;
 	int i, err, offset;
 
 	sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);
@@ -1909,7 +1973,7 @@  static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 				give_pages(rq, list);
 			return -ENOMEM;
 		}
-		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
+		sg_fill_dma(&rq->sg[i], page_dma_addr(first), PAGE_SIZE);
 
 		/* chain new page in list head to match sg */
 		page_chain_add(first, list);
@@ -1921,15 +1985,16 @@  static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 		give_pages(rq, list);
 		return -ENOMEM;
 	}
-	p = page_address(first);
+
+	p = page_dma_addr(first);
 
 	/* rq->sg[0], rq->sg[1] share the same page */
 	/* a separated rq->sg[0] for header - required in case !any_header_sg */
-	sg_set_buf(&rq->sg[0], p, vi->hdr_len);
+	sg_fill_dma(&rq->sg[0], p, vi->hdr_len);
 
 	/* rq->sg[1] for data packet, from offset */
 	offset = sizeof(struct padded_vnet_hdr);
-	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
+	sg_fill_dma(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 
 	/* chain first in list head */
 	page_chain_add(first, list);
@@ -2131,7 +2196,7 @@  static int virtnet_receive(struct receive_queue *rq, int budget,
 		}
 	} else {
 		while (packets < budget &&
-		       (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
+		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
 			receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
 			packets++;
 		}
@@ -4252,8 +4317,7 @@  static void _free_receive_bufs(struct virtnet_info *vi)
 	int i;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
-		while (vi->rq[i].pages)
-			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
+		page_chain_release(&vi->rq[i]);
 
 		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
 		RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);