diff mbox series

[09/33] xsk: xsk_buff_pool add callback for dma_sync

Message ID 20230202110058.130695-10-xuanzhuo@linux.alibaba.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series virtio-net: support AF_XDP zero copy | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4 this patch: 4
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 59 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xuan Zhuo Feb. 2, 2023, 11 a.m. UTC
Use callback to implement dma sync to simplify subsequent support for
virtio dma sync.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 include/net/xsk_buff_pool.h |  6 ++++++
 net/xdp/xsk_buff_pool.c     | 24 ++++++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

Comments

Magnus Karlsson Feb. 2, 2023, 12:51 p.m. UTC | #1
On Thu, 2 Feb 2023 at 12:05, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> Use callback to implement dma sync to simplify subsequent support for
> virtio dma sync.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  include/net/xsk_buff_pool.h |  6 ++++++
>  net/xdp/xsk_buff_pool.c     | 24 ++++++++++++++++++++----
>  2 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> index 3e952e569418..53b681120354 100644
> --- a/include/net/xsk_buff_pool.h
> +++ b/include/net/xsk_buff_pool.h
> @@ -75,6 +75,12 @@ struct xsk_buff_pool {
>         u32 chunk_size;
>         u32 chunk_shift;
>         u32 frame_len;
> +       void (*dma_sync_for_cpu)(struct device *dev, dma_addr_t addr,
> +                                unsigned long offset, size_t size,
> +                                enum dma_data_direction dir);
> +       void (*dma_sync_for_device)(struct device *dev, dma_addr_t addr,
> +                                   unsigned long offset, size_t size,
> +                                   enum dma_data_direction dir);

If we put these two pointers here, the number of cache lines required
in the data path for this struct will be increased from 2 to 3 which
will likely affect performance negatively. These sync operations are
also not used on most systems. So how about we put them in the first
section of this struct labeled "Members only used in the control path
first." instead. There is a 26-byte hole at the end of it that can be
used.

>         u8 cached_need_wakeup;
>         bool uses_need_wakeup;
>         bool dma_need_sync;
> diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> index ed6c71826d31..78e325e195fa 100644
> --- a/net/xdp/xsk_buff_pool.c
> +++ b/net/xdp/xsk_buff_pool.c
> @@ -403,6 +403,20 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
>         return 0;
>  }
>
> +static void dma_sync_for_cpu(struct device *dev, dma_addr_t addr,
> +                            unsigned long offset, size_t size,
> +                            enum dma_data_direction dir)
> +{
> +       dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
> +}
> +
> +static void dma_sync_for_device(struct device *dev, dma_addr_t addr,
> +                               unsigned long offset, size_t size,
> +                               enum dma_data_direction dir)
> +{
> +       dma_sync_single_range_for_device(dev, addr, offset, size, dir);
> +}
> +
>  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
>                unsigned long attrs, struct page **pages, u32 nr_pages)
>  {
> @@ -421,6 +435,9 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
>                 return 0;
>         }
>
> +       pool->dma_sync_for_cpu = dma_sync_for_cpu;
> +       pool->dma_sync_for_device = dma_sync_for_device;
> +
>         dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
>         if (!dma_map)
>                 return -ENOMEM;
> @@ -667,15 +684,14 @@ EXPORT_SYMBOL(xp_raw_get_dma);
>
>  void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
>  {
> -       dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
> -                                     xskb->pool->frame_len, DMA_BIDIRECTIONAL);
> +       xskb->pool->dma_sync_for_cpu(xskb->pool->dev, xskb->dma, 0,
> +                                    xskb->pool->frame_len, DMA_BIDIRECTIONAL);
>  }
>  EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
>
>  void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
>                                  size_t size)
>  {
> -       dma_sync_single_range_for_device(pool->dev, dma, 0,
> -                                        size, DMA_BIDIRECTIONAL);
> +       pool->dma_sync_for_device(pool->dev, dma, 0, size, DMA_BIDIRECTIONAL);
>  }
>  EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
> --
> 2.32.0.3.g01195cf9f
>
Xuan Zhuo Feb. 3, 2023, 7:01 a.m. UTC | #2
On Thu, 2 Feb 2023 13:51:20 +0100, Magnus Karlsson <magnus.karlsson@gmail.com> wrote:
> On Thu, 2 Feb 2023 at 12:05, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > Use callback to implement dma sync to simplify subsequent support for
> > virtio dma sync.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  include/net/xsk_buff_pool.h |  6 ++++++
> >  net/xdp/xsk_buff_pool.c     | 24 ++++++++++++++++++++----
> >  2 files changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> > index 3e952e569418..53b681120354 100644
> > --- a/include/net/xsk_buff_pool.h
> > +++ b/include/net/xsk_buff_pool.h
> > @@ -75,6 +75,12 @@ struct xsk_buff_pool {
> >         u32 chunk_size;
> >         u32 chunk_shift;
> >         u32 frame_len;
> > +       void (*dma_sync_for_cpu)(struct device *dev, dma_addr_t addr,
> > +                                unsigned long offset, size_t size,
> > +                                enum dma_data_direction dir);
> > +       void (*dma_sync_for_device)(struct device *dev, dma_addr_t addr,
> > +                                   unsigned long offset, size_t size,
> > +                                   enum dma_data_direction dir);
>
> If we put these two pointers here, the number of cache lines required
> in the data path for this struct will be increased from 2 to 3 which
> will likely affect performance negatively. These sync operations are
> also not used on most systems. So how about we put them in the first
> section of this struct labeled "Members only used in the control path
> first." instead. There is a 26-byte hole at the end of it that can be
> used.


Will fix.

Thanks.


>
> >         u8 cached_need_wakeup;
> >         bool uses_need_wakeup;
> >         bool dma_need_sync;
> > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> > index ed6c71826d31..78e325e195fa 100644
> > --- a/net/xdp/xsk_buff_pool.c
> > +++ b/net/xdp/xsk_buff_pool.c
> > @@ -403,6 +403,20 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
> >         return 0;
> >  }
> >
> > +static void dma_sync_for_cpu(struct device *dev, dma_addr_t addr,
> > +                            unsigned long offset, size_t size,
> > +                            enum dma_data_direction dir)
> > +{
> > +       dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
> > +}
> > +
> > +static void dma_sync_for_device(struct device *dev, dma_addr_t addr,
> > +                               unsigned long offset, size_t size,
> > +                               enum dma_data_direction dir)
> > +{
> > +       dma_sync_single_range_for_device(dev, addr, offset, size, dir);
> > +}
> > +
> >  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
> >                unsigned long attrs, struct page **pages, u32 nr_pages)
> >  {
> > @@ -421,6 +435,9 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
> >                 return 0;
> >         }
> >
> > +       pool->dma_sync_for_cpu = dma_sync_for_cpu;
> > +       pool->dma_sync_for_device = dma_sync_for_device;
> > +
> >         dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
> >         if (!dma_map)
> >                 return -ENOMEM;
> > @@ -667,15 +684,14 @@ EXPORT_SYMBOL(xp_raw_get_dma);
> >
> >  void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
> >  {
> > -       dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
> > -                                     xskb->pool->frame_len, DMA_BIDIRECTIONAL);
> > +       xskb->pool->dma_sync_for_cpu(xskb->pool->dev, xskb->dma, 0,
> > +                                    xskb->pool->frame_len, DMA_BIDIRECTIONAL);
> >  }
> >  EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
> >
> >  void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
> >                                  size_t size)
> >  {
> > -       dma_sync_single_range_for_device(pool->dev, dma, 0,
> > -                                        size, DMA_BIDIRECTIONAL);
> > +       pool->dma_sync_for_device(pool->dev, dma, 0, size, DMA_BIDIRECTIONAL);
> >  }
> >  EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
> > --
> > 2.32.0.3.g01195cf9f
> >
diff mbox series

Patch

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 3e952e569418..53b681120354 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -75,6 +75,12 @@  struct xsk_buff_pool {
 	u32 chunk_size;
 	u32 chunk_shift;
 	u32 frame_len;
+	void (*dma_sync_for_cpu)(struct device *dev, dma_addr_t addr,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction dir);
+	void (*dma_sync_for_device)(struct device *dev, dma_addr_t addr,
+				    unsigned long offset, size_t size,
+				    enum dma_data_direction dir);
 	u8 cached_need_wakeup;
 	bool uses_need_wakeup;
 	bool dma_need_sync;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index ed6c71826d31..78e325e195fa 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -403,6 +403,20 @@  static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
 	return 0;
 }
 
+static void dma_sync_for_cpu(struct device *dev, dma_addr_t addr,
+			     unsigned long offset, size_t size,
+			     enum dma_data_direction dir)
+{
+	dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
+}
+
+static void dma_sync_for_device(struct device *dev, dma_addr_t addr,
+				unsigned long offset, size_t size,
+				enum dma_data_direction dir)
+{
+	dma_sync_single_range_for_device(dev, addr, offset, size, dir);
+}
+
 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
 	       unsigned long attrs, struct page **pages, u32 nr_pages)
 {
@@ -421,6 +435,9 @@  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
 		return 0;
 	}
 
+	pool->dma_sync_for_cpu = dma_sync_for_cpu;
+	pool->dma_sync_for_device = dma_sync_for_device;
+
 	dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
 	if (!dma_map)
 		return -ENOMEM;
@@ -667,15 +684,14 @@  EXPORT_SYMBOL(xp_raw_get_dma);
 
 void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
 {
-	dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
-				      xskb->pool->frame_len, DMA_BIDIRECTIONAL);
+	xskb->pool->dma_sync_for_cpu(xskb->pool->dev, xskb->dma, 0,
+				     xskb->pool->frame_len, DMA_BIDIRECTIONAL);
 }
 EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
 
 void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
 				 size_t size)
 {
-	dma_sync_single_range_for_device(pool->dev, dma, 0,
-					 size, DMA_BIDIRECTIONAL);
+	pool->dma_sync_for_device(pool->dev, dma, 0, size, DMA_BIDIRECTIONAL);
 }
 EXPORT_SYMBOL(xp_dma_sync_for_device_slow);