diff mbox series

[net-next,v1] xsk: introduce xsk_dma_cbs

Message ID 20230423062546.96880-1-xuanzhuo@linux.alibaba.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v1] xsk: introduce xsk_dma_cbs | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 12 this patch: 12
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang fail Errors and warnings before: 13 this patch: 13
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 14 this patch: 14
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xuan Zhuo April 23, 2023, 6:25 a.m. UTC
The purpose of this patch is to allow driver pass the own dma callbacks
to xsk.

This is to cope with the scene of virtio-net. If virtio does not have
VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case,
xsk cannot use DMA API directly to achieve DMA address. Based on this
scene, we must let xsk support driver to use the driver's dma callbacks.

More is here:
    https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/
    https://lore.kernel.org/all/20230421065059.1bc78133@kernel.org

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 include/net/xdp_sock_drv.h  | 20 ++++++++++-
 include/net/xsk_buff_pool.h | 19 ++++++++++
 net/xdp/xsk_buff_pool.c     | 71 +++++++++++++++++++++++++++----------
 3 files changed, 90 insertions(+), 20 deletions(-)

Comments

gregkh@linuxfoundation.org April 23, 2023, 6:45 a.m. UTC | #1
On Sun, Apr 23, 2023 at 02:25:45PM +0800, Xuan Zhuo wrote:
> The purpose of this patch is to allow driver pass the own dma callbacks
> to xsk.
> 
> This is to cope with the scene of virtio-net. If virtio does not have
> VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case,
> xsk cannot use DMA API directly to achieve DMA address. Based on this
> scene, we must let xsk support driver to use the driver's dma callbacks.

Why does virtio need to use dma?  That seems to go against the overall
goal of virtio's new security restrictions that are being proposed
(where they do NOT want it to use dma as it is not secure).

And why is virtio special here?  If you have access to a device, it
should have all of the needed dma hooks already set up based on the
bus it is on.  Or is the issue you don't have a real bus set up?  If so,
why not fix that?

> More is here:
>     https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/
>     https://lore.kernel.org/all/20230421065059.1bc78133@kernel.org

Am I missing the user of this new api?  Don't you need to submit that at
the same time so we can at least see if this new api works properly?

> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  include/net/xdp_sock_drv.h  | 20 ++++++++++-
>  include/net/xsk_buff_pool.h | 19 ++++++++++
>  net/xdp/xsk_buff_pool.c     | 71 +++++++++++++++++++++++++++----------
>  3 files changed, 90 insertions(+), 20 deletions(-)
> 
> diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> index 9c0d860609ba..8b5284b272e4 100644
> --- a/include/net/xdp_sock_drv.h
> +++ b/include/net/xdp_sock_drv.h
> @@ -67,7 +67,17 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
>  {
>  	struct xdp_umem *umem = pool->umem;
>  
> -	return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
> +	return xp_dma_map(pool, dev, NULL, attrs, umem->pgs, umem->npgs);
> +}
> +
> +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> +					    struct device *dev,
> +					    struct xsk_dma_cbs *dma_cbs,
> +					    unsigned long attrs)
> +{
> +	struct xdp_umem *umem = pool->umem;
> +
> +	return xp_dma_map(pool, dev, dma_cbs, attrs, umem->pgs, umem->npgs);
>  }
>  
>  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> @@ -226,6 +236,14 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
>  	return 0;
>  }
>  
> +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> +					    struct device *dev,
> +					    struct xsk_dma_cbs *dma_cbs,
> +					    unsigned long attrs)
> +{
> +	return 0;
> +}
> +
>  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
>  {
>  	return 0;
> diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> index 3e952e569418..2de88be9188b 100644
> --- a/include/net/xsk_buff_pool.h
> +++ b/include/net/xsk_buff_pool.h
> @@ -43,6 +43,23 @@ struct xsk_dma_map {
>  	bool dma_need_sync;
>  };
>  
> +struct xsk_dma_cbs {
> +	dma_addr_t (*map_page)(struct device *dev, struct page *page,

Why are you working on a "raw" struct device here and everywhere else?
Are you sure that is ok?  What is it needed for?

> +			       size_t offset, size_t size,
> +			       enum dma_data_direction dir, unsigned long attrs);
> +	void (*unmap_page)(struct device *dev, dma_addr_t addr,
> +			   size_t size, enum dma_data_direction dir,
> +			   unsigned long attrs);
> +	int (*mapping_error)(struct device *dev, dma_addr_t addr);
> +	bool (*need_sync)(struct device *dev, dma_addr_t addr);
> +	void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t addr,
> +					  size_t offset, size_t size,
> +					  enum dma_data_direction dir);
> +	void (*sync_single_range_for_device)(struct device *dev, dma_addr_t addr,
> +					     size_t offset, size_t size,
> +					     enum dma_data_direction dir);
> +};

No documentation for any of these callbacks?  Please use kerneldoc so we
at least have a clue as to what they should be doing.

> +
>  struct xsk_buff_pool {
>  	/* Members only used in the control path first. */
>  	struct device *dev;
> @@ -85,6 +102,7 @@ struct xsk_buff_pool {
>  	 * sockets share a single cq when the same netdev and queue id is shared.
>  	 */
>  	spinlock_t cq_lock;
> +	struct xsk_dma_cbs *dma_cbs;
>  	struct xdp_buff_xsk *free_heads[];
>  };
>  
> @@ -131,6 +149,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
>  /* AF_XDP ZC drivers, via xdp_sock_buff.h */
>  void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
>  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
> +	       struct xsk_dma_cbs *dma_cbs,
>  	       unsigned long attrs, struct page **pages, u32 nr_pages);
>  void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
>  struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
> diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> index b2df1e0f8153..e7e6c91f6e51 100644
> --- a/net/xdp/xsk_buff_pool.c
> +++ b/net/xdp/xsk_buff_pool.c
> @@ -328,7 +328,8 @@ static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
>  	kfree(dma_map);
>  }
>  
> -static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> +static void __xp_dma_unmap(struct xsk_dma_map *dma_map,
> +			   struct xsk_dma_cbs *dma_cbs, unsigned long attrs)
>  {
>  	dma_addr_t *dma;
>  	u32 i;
> @@ -337,8 +338,12 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
>  		dma = &dma_map->dma_pages[i];
>  		if (*dma) {
>  			*dma &= ~XSK_NEXT_PG_CONTIG_MASK;
> -			dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
> -					     DMA_BIDIRECTIONAL, attrs);
> +			if (unlikely(dma_cbs))

If you can not measure the use of likely/unlikely in a benchmark, then
you should never use it as the compiler and CPU will work better without
it (and will work better over time as hardware and compiler change).

thanks,

greg k-h
Xuan Zhuo April 23, 2023, 6:58 a.m. UTC | #2
On Sun, 23 Apr 2023 08:45:20 +0200, Greg KH <gregkh@linuxfoundation.org> wrote:
> On Sun, Apr 23, 2023 at 02:25:45PM +0800, Xuan Zhuo wrote:
> > The purpose of this patch is to allow driver pass the own dma callbacks
> > to xsk.
> >
> > This is to cope with the scene of virtio-net. If virtio does not have
> > VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case,
> > xsk cannot use DMA API directly to achieve DMA address. Based on this
> > scene, we must let xsk support driver to use the driver's dma callbacks.
>
> Why does virtio need to use dma?  That seems to go against the overall
> goal of virtio's new security restrictions that are being proposed
> (where they do NOT want it to use dma as it is not secure).

Sorry, I don't know that, could you give me one link?

But now, virtio-net/virtio will use dma api, when the feature VIRTIO_F_ACCESS_PLATFORM
is got. If no VIRTIO_F_ACCESS_PLATFORM, the virtio(virtio-net) will not use DMA
API.

>
> And why is virtio special here?

The problem is that xsk always get dma by DMA APIs, but sometimes the
virtio-net can not work with DMA APIs.

> If you have access to a device, it
> should have all of the needed dma hooks already set up based on the
> bus it is on.  Or is the issue you don't have a real bus set up?  If so,
> why not fix that?

We tried, but that seams we can not.
More:
   https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/

>
> > More is here:
> >     https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/
> >     https://lore.kernel.org/all/20230421065059.1bc78133@kernel.org
>
> Am I missing the user of this new api?  Don't you need to submit that at
> the same time so we can at least see if this new api works properly?

The user will is the virtio-net with supporting to AF_XDP.

That is a huge patch set. Some is in virtio core, some is in net-dev.
An earlier version was [1] with some differences but not much.

	[1] http://lore.kernel.org/all/20230202110058.130695-1-xuanzhuo@linux.alibaba.com

I tried to split to multi patch-set.

Currently I plan to have several parts like this:

1. virtio core support premapped-dma, vq reset, expose dma device (virtio vhost branch)
2. virtio-net: refactor xdp (netdev branch)
3. virtio-net: support af-xdp (netdev branch)

But now, #1 is block by this dma question.

So, I want to complete this patch first.

>
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  include/net/xdp_sock_drv.h  | 20 ++++++++++-
> >  include/net/xsk_buff_pool.h | 19 ++++++++++
> >  net/xdp/xsk_buff_pool.c     | 71 +++++++++++++++++++++++++++----------
> >  3 files changed, 90 insertions(+), 20 deletions(-)
> >
> > diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> > index 9c0d860609ba..8b5284b272e4 100644
> > --- a/include/net/xdp_sock_drv.h
> > +++ b/include/net/xdp_sock_drv.h
> > @@ -67,7 +67,17 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
> >  {
> >  	struct xdp_umem *umem = pool->umem;
> >
> > -	return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
> > +	return xp_dma_map(pool, dev, NULL, attrs, umem->pgs, umem->npgs);
> > +}
> > +
> > +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> > +					    struct device *dev,
> > +					    struct xsk_dma_cbs *dma_cbs,
> > +					    unsigned long attrs)
> > +{
> > +	struct xdp_umem *umem = pool->umem;
> > +
> > +	return xp_dma_map(pool, dev, dma_cbs, attrs, umem->pgs, umem->npgs);
> >  }
> >
> >  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> > @@ -226,6 +236,14 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
> >  	return 0;
> >  }
> >
> > +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> > +					    struct device *dev,
> > +					    struct xsk_dma_cbs *dma_cbs,
> > +					    unsigned long attrs)
> > +{
> > +	return 0;
> > +}
> > +
> >  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> >  {
> >  	return 0;
> > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> > index 3e952e569418..2de88be9188b 100644
> > --- a/include/net/xsk_buff_pool.h
> > +++ b/include/net/xsk_buff_pool.h
> > @@ -43,6 +43,23 @@ struct xsk_dma_map {
> >  	bool dma_need_sync;
> >  };
> >
> > +struct xsk_dma_cbs {
> > +	dma_addr_t (*map_page)(struct device *dev, struct page *page,
>
> Why are you working on a "raw" struct device here and everywhere else?
> Are you sure that is ok?  What is it needed for?

I copy this from DMA APIs. For virtio that is not needed. But is there any
problems?


>
> > +			       size_t offset, size_t size,
> > +			       enum dma_data_direction dir, unsigned long attrs);
> > +	void (*unmap_page)(struct device *dev, dma_addr_t addr,
> > +			   size_t size, enum dma_data_direction dir,
> > +			   unsigned long attrs);
> > +	int (*mapping_error)(struct device *dev, dma_addr_t addr);
> > +	bool (*need_sync)(struct device *dev, dma_addr_t addr);
> > +	void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t addr,
> > +					  size_t offset, size_t size,
> > +					  enum dma_data_direction dir);
> > +	void (*sync_single_range_for_device)(struct device *dev, dma_addr_t addr,
> > +					     size_t offset, size_t size,
> > +					     enum dma_data_direction dir);
> > +};
>
> No documentation for any of these callbacks?  Please use kerneldoc so we
> at least have a clue as to what they should be doing.
>
> > +
> >  struct xsk_buff_pool {
> >  	/* Members only used in the control path first. */
> >  	struct device *dev;
> > @@ -85,6 +102,7 @@ struct xsk_buff_pool {
> >  	 * sockets share a single cq when the same netdev and queue id is shared.
> >  	 */
> >  	spinlock_t cq_lock;
> > +	struct xsk_dma_cbs *dma_cbs;
> >  	struct xdp_buff_xsk *free_heads[];
> >  };
> >
> > @@ -131,6 +149,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
> >  /* AF_XDP ZC drivers, via xdp_sock_buff.h */
> >  void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
> >  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
> > +	       struct xsk_dma_cbs *dma_cbs,
> >  	       unsigned long attrs, struct page **pages, u32 nr_pages);
> >  void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
> >  struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
> > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> > index b2df1e0f8153..e7e6c91f6e51 100644
> > --- a/net/xdp/xsk_buff_pool.c
> > +++ b/net/xdp/xsk_buff_pool.c
> > @@ -328,7 +328,8 @@ static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
> >  	kfree(dma_map);
> >  }
> >
> > -static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> > +static void __xp_dma_unmap(struct xsk_dma_map *dma_map,
> > +			   struct xsk_dma_cbs *dma_cbs, unsigned long attrs)
> >  {
> >  	dma_addr_t *dma;
> >  	u32 i;
> > @@ -337,8 +338,12 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> >  		dma = &dma_map->dma_pages[i];
> >  		if (*dma) {
> >  			*dma &= ~XSK_NEXT_PG_CONTIG_MASK;
> > -			dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
> > -					     DMA_BIDIRECTIONAL, attrs);
> > +			if (unlikely(dma_cbs))
>
> If you can not measure the use of likely/unlikely in a benchmark, then
> you should never use it as the compiler and CPU will work better without
> it (and will work better over time as hardware and compiler change).

Because in most cases the dma_cbs is null for xsk. So I use the 'unlikely'.

Thanks.


>
> thanks,
>
> greg k-h
gregkh@linuxfoundation.org April 23, 2023, 8:39 a.m. UTC | #3
On Sun, Apr 23, 2023 at 02:58:36PM +0800, Xuan Zhuo wrote:
> On Sun, 23 Apr 2023 08:45:20 +0200, Greg KH <gregkh@linuxfoundation.org> wrote:
> > On Sun, Apr 23, 2023 at 02:25:45PM +0800, Xuan Zhuo wrote:
> > > The purpose of this patch is to allow driver pass the own dma callbacks
> > > to xsk.
> > >
> > > This is to cope with the scene of virtio-net. If virtio does not have
> > > VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case,
> > > xsk cannot use DMA API directly to achieve DMA address. Based on this
> > > scene, we must let xsk support driver to use the driver's dma callbacks.
> >
> > Why does virtio need to use dma?  That seems to go against the overall
> > goal of virtio's new security restrictions that are being proposed
> > (where they do NOT want it to use dma as it is not secure).
> 
> Sorry, I don't know that, could you give me one link?

Search for the "Confidential Computing" emails on lkml for an example of
this mess.

> But now, virtio-net/virtio will use dma api, when the feature VIRTIO_F_ACCESS_PLATFORM
> is got. If no VIRTIO_F_ACCESS_PLATFORM, the virtio(virtio-net) will not use DMA
> API.
> 
> >
> > And why is virtio special here?
> 
> The problem is that xsk always get dma by DMA APIs, but sometimes the
> virtio-net can not work with DMA APIs.
> 
> > If you have access to a device, it
> > should have all of the needed dma hooks already set up based on the
> > bus it is on.  Or is the issue you don't have a real bus set up?  If so,
> > why not fix that?
> 
> We tried, but that seams we can not.
> More:
>    https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/

So you are assuming a random virtual device with no dma backing bus is
allowed to do dma operations?  This feels wrong and should be fixed
somewhere so that the bus involved (you do have hardware here), is
properly exposed.

Please work with Christoph and others to solve this, don't try to route
around the issue.  Your patches are assuming that you have a device
pointer anyway, and that's on a bus, so it does have access somehow,
right?

> > > More is here:
> > >     https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/
> > >     https://lore.kernel.org/all/20230421065059.1bc78133@kernel.org
> >
> > Am I missing the user of this new api?  Don't you need to submit that at
> > the same time so we can at least see if this new api works properly?
> 
> The user will is the virtio-net with supporting to AF_XDP.
> 
> That is a huge patch set. Some is in virtio core, some is in net-dev.
> An earlier version was [1] with some differences but not much.
> 
> 	[1] http://lore.kernel.org/all/20230202110058.130695-1-xuanzhuo@linux.alibaba.com
> 
> I tried to split to multi patch-set.
> 
> Currently I plan to have several parts like this:
> 
> 1. virtio core support premapped-dma, vq reset, expose dma device (virtio vhost branch)
> 2. virtio-net: refactor xdp (netdev branch)
> 3. virtio-net: support af-xdp (netdev branch)
> 
> But now, #1 is block by this dma question.
> 
> So, I want to complete this patch first.

As you know, we can never take apis without a real user for them,
because obviously those apis could be instantly removed from the system
because no one is using them.  We also do not know that the api is even
correct at all, without being able to see all of the users.

So make this a larger patch set please.  What would you do if you had to
review this without any prior information?  Make it easy for reviewers
please.

> >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  include/net/xdp_sock_drv.h  | 20 ++++++++++-
> > >  include/net/xsk_buff_pool.h | 19 ++++++++++
> > >  net/xdp/xsk_buff_pool.c     | 71 +++++++++++++++++++++++++++----------
> > >  3 files changed, 90 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> > > index 9c0d860609ba..8b5284b272e4 100644
> > > --- a/include/net/xdp_sock_drv.h
> > > +++ b/include/net/xdp_sock_drv.h
> > > @@ -67,7 +67,17 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
> > >  {
> > >  	struct xdp_umem *umem = pool->umem;
> > >
> > > -	return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
> > > +	return xp_dma_map(pool, dev, NULL, attrs, umem->pgs, umem->npgs);
> > > +}
> > > +
> > > +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> > > +					    struct device *dev,
> > > +					    struct xsk_dma_cbs *dma_cbs,
> > > +					    unsigned long attrs)
> > > +{
> > > +	struct xdp_umem *umem = pool->umem;
> > > +
> > > +	return xp_dma_map(pool, dev, dma_cbs, attrs, umem->pgs, umem->npgs);
> > >  }
> > >
> > >  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> > > @@ -226,6 +236,14 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
> > >  	return 0;
> > >  }
> > >
> > > +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> > > +					    struct device *dev,
> > > +					    struct xsk_dma_cbs *dma_cbs,
> > > +					    unsigned long attrs)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > >  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> > >  {
> > >  	return 0;
> > > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> > > index 3e952e569418..2de88be9188b 100644
> > > --- a/include/net/xsk_buff_pool.h
> > > +++ b/include/net/xsk_buff_pool.h
> > > @@ -43,6 +43,23 @@ struct xsk_dma_map {
> > >  	bool dma_need_sync;
> > >  };
> > >
> > > +struct xsk_dma_cbs {
> > > +	dma_addr_t (*map_page)(struct device *dev, struct page *page,
> >
> > Why are you working on a "raw" struct device here and everywhere else?
> > Are you sure that is ok?  What is it needed for?
> 
> I copy this from DMA APIs. For virtio that is not needed. But is there any
> problems?

See above, where are you getting this random device pointer from?
Eventually it does point to something on a bus with DMA access, right?
So why not use the right pointer for that type of device?

Or are you going to have devices of different types using this api?  If
so, where?  Again, this is hard to review without any real users.

> > > +			       size_t offset, size_t size,
> > > +			       enum dma_data_direction dir, unsigned long attrs);
> > > +	void (*unmap_page)(struct device *dev, dma_addr_t addr,
> > > +			   size_t size, enum dma_data_direction dir,
> > > +			   unsigned long attrs);
> > > +	int (*mapping_error)(struct device *dev, dma_addr_t addr);
> > > +	bool (*need_sync)(struct device *dev, dma_addr_t addr);
> > > +	void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t addr,
> > > +					  size_t offset, size_t size,
> > > +					  enum dma_data_direction dir);
> > > +	void (*sync_single_range_for_device)(struct device *dev, dma_addr_t addr,
> > > +					     size_t offset, size_t size,
> > > +					     enum dma_data_direction dir);
> > > +};
> >
> > No documentation for any of these callbacks?  Please use kerneldoc so we
> > at least have a clue as to what they should be doing.
> >
> > > +
> > >  struct xsk_buff_pool {
> > >  	/* Members only used in the control path first. */
> > >  	struct device *dev;
> > > @@ -85,6 +102,7 @@ struct xsk_buff_pool {
> > >  	 * sockets share a single cq when the same netdev and queue id is shared.
> > >  	 */
> > >  	spinlock_t cq_lock;
> > > +	struct xsk_dma_cbs *dma_cbs;
> > >  	struct xdp_buff_xsk *free_heads[];
> > >  };
> > >
> > > @@ -131,6 +149,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
> > >  /* AF_XDP ZC drivers, via xdp_sock_buff.h */
> > >  void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
> > >  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
> > > +	       struct xsk_dma_cbs *dma_cbs,
> > >  	       unsigned long attrs, struct page **pages, u32 nr_pages);
> > >  void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
> > >  struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
> > > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> > > index b2df1e0f8153..e7e6c91f6e51 100644
> > > --- a/net/xdp/xsk_buff_pool.c
> > > +++ b/net/xdp/xsk_buff_pool.c
> > > @@ -328,7 +328,8 @@ static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
> > >  	kfree(dma_map);
> > >  }
> > >
> > > -static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> > > +static void __xp_dma_unmap(struct xsk_dma_map *dma_map,
> > > +			   struct xsk_dma_cbs *dma_cbs, unsigned long attrs)
> > >  {
> > >  	dma_addr_t *dma;
> > >  	u32 i;
> > > @@ -337,8 +338,12 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> > >  		dma = &dma_map->dma_pages[i];
> > >  		if (*dma) {
> > >  			*dma &= ~XSK_NEXT_PG_CONTIG_MASK;
> > > -			dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
> > > -					     DMA_BIDIRECTIONAL, attrs);
> > > +			if (unlikely(dma_cbs))
> >
> > If you can not measure the use of likely/unlikely in a benchmark, then
> > you should never use it as the compiler and CPU will work better without
> > it (and will work better over time as hardware and compiler change).
> 
> Because in most cases the dma_cbs is null for xsk. So I use the 'unlikely'.

Again, prove that this is needed, otherwise do not use it.

thanks,

greg k-h
Xuan Zhuo April 23, 2023, 9:13 a.m. UTC | #4
On Sun, 23 Apr 2023 10:39:15 +0200, Greg KH <gregkh@linuxfoundation.org> wrote:
> On Sun, Apr 23, 2023 at 02:58:36PM +0800, Xuan Zhuo wrote:
> > On Sun, 23 Apr 2023 08:45:20 +0200, Greg KH <gregkh@linuxfoundation.org> wrote:
> > > On Sun, Apr 23, 2023 at 02:25:45PM +0800, Xuan Zhuo wrote:
> > > > The purpose of this patch is to allow driver pass the own dma callbacks
> > > > to xsk.
> > > >
> > > > This is to cope with the scene of virtio-net. If virtio does not have
> > > > VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case,
> > > > xsk cannot use DMA API directly to achieve DMA address. Based on this
> > > > scene, we must let xsk support driver to use the driver's dma callbacks.
> > >
> > > Why does virtio need to use dma?  That seems to go against the overall
> > > goal of virtio's new security restrictions that are being proposed
> > > (where they do NOT want it to use dma as it is not secure).
> >
> > Sorry, I don't know that, could you give me one link?
>
> Search for the "Confidential Computing" emails on lkml for an example of
> this mess.
>
> > But now, virtio-net/virtio will use dma api, when the feature VIRTIO_F_ACCESS_PLATFORM
> > is got. If no VIRTIO_F_ACCESS_PLATFORM, the virtio(virtio-net) will not use DMA
> > API.
> >
> > >
> > > And why is virtio special here?
> >
> > The problem is that xsk always get dma by DMA APIs, but sometimes the
> > virtio-net can not work with DMA APIs.
> >
> > > If you have access to a device, it
> > > should have all of the needed dma hooks already set up based on the
> > > bus it is on.  Or is the issue you don't have a real bus set up?  If so,
> > > why not fix that?
> >
> > We tried, but that seams we can not.
> > More:
> >    https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/
>
> So you are assuming a random virtual device with no dma backing bus is
> allowed to do dma operations?  This feels wrong and should be fixed
> somewhere so that the bus involved (you do have hardware here), is
> properly exposed.

YES.

The conclusion of the discussion with Christoph is that we have no way
to get the direct dma address from virtio-device or virtio-pci-device based on
the dma api. That's why this patch try to make xsk not use dma apis in some
cases.

>
> Please work with Christoph and others to solve this, don't try to route
> around the issue.  Your patches are assuming that you have a device
> pointer anyway, and that's on a bus, so it does have access somehow,
> right?

NO.

Sorry, this brings trouble to reviewers who don't know the context.

Here is the code inside the virtio-net using this cbs.

+static dma_addr_t virtnet_xsk_dma_map_page(struct device *dev, struct page *page,
+                                          size_t offset, size_t size,
+                                          enum dma_data_direction dir, unsigned long attrs)
+{
+       return (dma_addr_t)virt_to_phys(page_address(page) + offset);
+}
+
+static void virtnet_xsk_dma_unmap_page(struct device *dev, dma_addr_t addr,
+                                      size_t size, enum dma_data_direction dir,
+                                      unsigned long attrs)
+{
+}
+
+static int virtnet_xsk_dma_mapping_error(struct device *dev, dma_addr_t addr)
+{
+       return 0;
+}
+
+static bool virtnet_xsk_dma_need_sync(struct device *dev, dma_addr_t addr)
+{
+       return false;
+}
+
+static void virtnet_xsk_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+                                                     size_t offset, size_t size,
+                                                     enum dma_data_direction dir)
+{
+}
+
+static void virtnet_xsk_dma_sync_single_range_for_device(struct device *dev, dma_addr_t addr,
+                                                        size_t offset, size_t size,
+                                                        enum dma_data_direction dir)
+{
+}
+
+static struct xsk_dma_ops virtnet_xsk_dma_ops = {
+       .map_page = virtnet_xsk_dma_map_page,
+       .unmap_page = virtnet_xsk_dma_unmap_page,
+       .mapping_error = virtnet_xsk_dma_mapping_error,
+       .need_sync = virtnet_xsk_dma_need_sync,
+       .sync_single_range_for_cpu = virtnet_xsk_dma_sync_single_range_for_cpu,
+       .sync_single_range_for_device = virtnet_xsk_dma_sync_single_range_for_device,
+};
+

The device is included in the interface, just to ensure the compatibility of dma
api, there is not necessarily a device at all.

>
> > > > More is here:
> > > >     https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo@linux.alibaba.com/
> > > >     https://lore.kernel.org/all/20230421065059.1bc78133@kernel.org
> > >
> > > Am I missing the user of this new api?  Don't you need to submit that at
> > > the same time so we can at least see if this new api works properly?
> >
> > The user will is the virtio-net with supporting to AF_XDP.
> >
> > That is a huge patch set. Some is in virtio core, some is in net-dev.
> > An earlier version was [1] with some differences but not much.
> >
> > 	[1] http://lore.kernel.org/all/20230202110058.130695-1-xuanzhuo@linux.alibaba.com
> >
> > I tried to split to multi patch-set.
> >
> > Currently I plan to have several parts like this:
> >
> > 1. virtio core support premapped-dma, vq reset, expose dma device (virtio vhost branch)
> > 2. virtio-net: refactor xdp (netdev branch)
> > 3. virtio-net: support af-xdp (netdev branch)
> >
> > But now, #1 is block by this dma question.
> >
> > So, I want to complete this patch first.
>
> As you know, we can never take apis without a real user for them,
> because obviously those apis could be instantly removed from the system
> because no one is using them.  We also do not know that the api is even
> correct at all, without being able to see all of the users.

We can not merge this patch, but let's complete the discussion of this issue
first. Next version, I can change to rfc.

Thanks.

>
> So make this a larger patch set please.  What would you do if you had to
> review this without any prior information?  Make it easy for reviewers
> please.
>
> > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  include/net/xdp_sock_drv.h  | 20 ++++++++++-
> > > >  include/net/xsk_buff_pool.h | 19 ++++++++++
> > > >  net/xdp/xsk_buff_pool.c     | 71 +++++++++++++++++++++++++++----------
> > > >  3 files changed, 90 insertions(+), 20 deletions(-)
> > > >
> > > > diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> > > > index 9c0d860609ba..8b5284b272e4 100644
> > > > --- a/include/net/xdp_sock_drv.h
> > > > +++ b/include/net/xdp_sock_drv.h
> > > > @@ -67,7 +67,17 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
> > > >  {
> > > >  	struct xdp_umem *umem = pool->umem;
> > > >
> > > > -	return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
> > > > +	return xp_dma_map(pool, dev, NULL, attrs, umem->pgs, umem->npgs);
> > > > +}
> > > > +
> > > > +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> > > > +					    struct device *dev,
> > > > +					    struct xsk_dma_cbs *dma_cbs,
> > > > +					    unsigned long attrs)
> > > > +{
> > > > +	struct xdp_umem *umem = pool->umem;
> > > > +
> > > > +	return xp_dma_map(pool, dev, dma_cbs, attrs, umem->pgs, umem->npgs);
> > > >  }
> > > >
> > > >  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> > > > @@ -226,6 +236,14 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
> > > >  	return 0;
> > > >  }
> > > >
> > > > +static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
> > > > +					    struct device *dev,
> > > > +					    struct xsk_dma_cbs *dma_cbs,
> > > > +					    unsigned long attrs)
> > > > +{
> > > > +	return 0;
> > > > +}
> > > > +
> > > >  static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
> > > >  {
> > > >  	return 0;
> > > > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> > > > index 3e952e569418..2de88be9188b 100644
> > > > --- a/include/net/xsk_buff_pool.h
> > > > +++ b/include/net/xsk_buff_pool.h
> > > > @@ -43,6 +43,23 @@ struct xsk_dma_map {
> > > >  	bool dma_need_sync;
> > > >  };
> > > >
> > > > +struct xsk_dma_cbs {
> > > > +	dma_addr_t (*map_page)(struct device *dev, struct page *page,
> > >
> > > Why are you working on a "raw" struct device here and everywhere else?
> > > Are you sure that is ok?  What is it needed for?
> >
> > I copy this from DMA APIs. For virtio that is not needed. But is there any
> > problems?
>
> See above, where are you getting this random device pointer from?
> Eventually it does point to something on a bus with DMA access, right?
> So why not use the right pointer for that type of device?
>
> Or are you going to have devices of different types using this api?  If
> so, where?  Again, this is hard to review without any real users.
>
> > > > +			       size_t offset, size_t size,
> > > > +			       enum dma_data_direction dir, unsigned long attrs);
> > > > +	void (*unmap_page)(struct device *dev, dma_addr_t addr,
> > > > +			   size_t size, enum dma_data_direction dir,
> > > > +			   unsigned long attrs);
> > > > +	int (*mapping_error)(struct device *dev, dma_addr_t addr);
> > > > +	bool (*need_sync)(struct device *dev, dma_addr_t addr);
> > > > +	void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t addr,
> > > > +					  size_t offset, size_t size,
> > > > +					  enum dma_data_direction dir);
> > > > +	void (*sync_single_range_for_device)(struct device *dev, dma_addr_t addr,
> > > > +					     size_t offset, size_t size,
> > > > +					     enum dma_data_direction dir);
> > > > +};
> > >
> > > No documentation for any of these callbacks?  Please use kerneldoc so we
> > > at least have a clue as to what they should be doing.
> > >
> > > > +
> > > >  struct xsk_buff_pool {
> > > >  	/* Members only used in the control path first. */
> > > >  	struct device *dev;
> > > > @@ -85,6 +102,7 @@ struct xsk_buff_pool {
> > > >  	 * sockets share a single cq when the same netdev and queue id is shared.
> > > >  	 */
> > > >  	spinlock_t cq_lock;
> > > > +	struct xsk_dma_cbs *dma_cbs;
> > > >  	struct xdp_buff_xsk *free_heads[];
> > > >  };
> > > >
> > > > @@ -131,6 +149,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
> > > >  /* AF_XDP ZC drivers, via xdp_sock_buff.h */
> > > >  void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
> > > >  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
> > > > +	       struct xsk_dma_cbs *dma_cbs,
> > > >  	       unsigned long attrs, struct page **pages, u32 nr_pages);
> > > >  void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
> > > >  struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
> > > > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> > > > index b2df1e0f8153..e7e6c91f6e51 100644
> > > > --- a/net/xdp/xsk_buff_pool.c
> > > > +++ b/net/xdp/xsk_buff_pool.c
> > > > @@ -328,7 +328,8 @@ static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
> > > >  	kfree(dma_map);
> > > >  }
> > > >
> > > > -static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> > > > +static void __xp_dma_unmap(struct xsk_dma_map *dma_map,
> > > > +			   struct xsk_dma_cbs *dma_cbs, unsigned long attrs)
> > > >  {
> > > >  	dma_addr_t *dma;
> > > >  	u32 i;
> > > > @@ -337,8 +338,12 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
> > > >  		dma = &dma_map->dma_pages[i];
> > > >  		if (*dma) {
> > > >  			*dma &= ~XSK_NEXT_PG_CONTIG_MASK;
> > > > -			dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
> > > > -					     DMA_BIDIRECTIONAL, attrs);
> > > > +			if (unlikely(dma_cbs))
> > >
> > > If you can not measure the use of likely/unlikely in a benchmark, then
> > > you should never use it as the compiler and CPU will work better without
> > > it (and will work better over time as hardware and compiler change).
> >
> > Because in most cases the dma_cbs is null for xsk. So I use the 'unlikely'.
>
> Again, prove that this is needed, otherwise do not use it.
>
> thanks,
>
> greg k-h
Michael S. Tsirkin April 23, 2023, 8:42 p.m. UTC | #5
On Sun, Apr 23, 2023 at 08:45:20AM +0200, Greg KH wrote:
> On Sun, Apr 23, 2023 at 02:25:45PM +0800, Xuan Zhuo wrote:
> > The purpose of this patch is to allow driver pass the own dma callbacks
> > to xsk.
> > 
> > This is to cope with the scene of virtio-net. If virtio does not have
> > VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case,
> > xsk cannot use DMA API directly to achieve DMA address. Based on this
> > scene, we must let xsk support driver to use the driver's dma callbacks.
> 
> Why does virtio need to use dma?  That seems to go against the overall
> goal of virtio's new security restrictions that are being proposed
> (where they do NOT want it to use dma as it is not secure).

Yes, they exactly use dma, specifically dma into bounce buffer.
diff mbox series

Patch

diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 9c0d860609ba..8b5284b272e4 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -67,7 +67,17 @@  static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
 {
 	struct xdp_umem *umem = pool->umem;
 
-	return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs);
+	return xp_dma_map(pool, dev, NULL, attrs, umem->pgs, umem->npgs);
+}
+
+static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
+					    struct device *dev,
+					    struct xsk_dma_cbs *dma_cbs,
+					    unsigned long attrs)
+{
+	struct xdp_umem *umem = pool->umem;
+
+	return xp_dma_map(pool, dev, dma_cbs, attrs, umem->pgs, umem->npgs);
 }
 
 static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
@@ -226,6 +236,14 @@  static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool,
 	return 0;
 }
 
+static inline int xsk_pool_dma_map_with_cbs(struct xsk_buff_pool *pool,
+					    struct device *dev,
+					    struct xsk_dma_cbs *dma_cbs,
+					    unsigned long attrs)
+{
+	return 0;
+}
+
 static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
 {
 	return 0;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 3e952e569418..2de88be9188b 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -43,6 +43,23 @@  struct xsk_dma_map {
 	bool dma_need_sync;
 };
 
+struct xsk_dma_cbs {
+	dma_addr_t (*map_page)(struct device *dev, struct page *page,
+			       size_t offset, size_t size,
+			       enum dma_data_direction dir, unsigned long attrs);
+	void (*unmap_page)(struct device *dev, dma_addr_t addr,
+			   size_t size, enum dma_data_direction dir,
+			   unsigned long attrs);
+	int (*mapping_error)(struct device *dev, dma_addr_t addr);
+	bool (*need_sync)(struct device *dev, dma_addr_t addr);
+	void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t addr,
+					  size_t offset, size_t size,
+					  enum dma_data_direction dir);
+	void (*sync_single_range_for_device)(struct device *dev, dma_addr_t addr,
+					     size_t offset, size_t size,
+					     enum dma_data_direction dir);
+};
+
 struct xsk_buff_pool {
 	/* Members only used in the control path first. */
 	struct device *dev;
@@ -85,6 +102,7 @@  struct xsk_buff_pool {
 	 * sockets share a single cq when the same netdev and queue id is shared.
 	 */
 	spinlock_t cq_lock;
+	struct xsk_dma_cbs *dma_cbs;
 	struct xdp_buff_xsk *free_heads[];
 };
 
@@ -131,6 +149,7 @@  static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
 /* AF_XDP ZC drivers, via xdp_sock_buff.h */
 void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
+	       struct xsk_dma_cbs *dma_cbs,
 	       unsigned long attrs, struct page **pages, u32 nr_pages);
 void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
 struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b2df1e0f8153..e7e6c91f6e51 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -328,7 +328,8 @@  static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
 	kfree(dma_map);
 }
 
-static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
+static void __xp_dma_unmap(struct xsk_dma_map *dma_map,
+			   struct xsk_dma_cbs *dma_cbs, unsigned long attrs)
 {
 	dma_addr_t *dma;
 	u32 i;
@@ -337,8 +338,12 @@  static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
 		dma = &dma_map->dma_pages[i];
 		if (*dma) {
 			*dma &= ~XSK_NEXT_PG_CONTIG_MASK;
-			dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
-					     DMA_BIDIRECTIONAL, attrs);
+			if (unlikely(dma_cbs))
+				dma_cbs->unmap_page(dma_map->dev, *dma, PAGE_SIZE,
+						    DMA_BIDIRECTIONAL, attrs);
+			else
+				dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
+						     DMA_BIDIRECTIONAL, attrs);
 			*dma = 0;
 		}
 	}
@@ -362,7 +367,7 @@  void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
 	if (!refcount_dec_and_test(&dma_map->users))
 		return;
 
-	__xp_dma_unmap(dma_map, attrs);
+	__xp_dma_unmap(dma_map, pool->dma_cbs, attrs);
 	kvfree(pool->dma_pages);
 	pool->dma_pages_cnt = 0;
 	pool->dev = NULL;
@@ -407,6 +412,7 @@  static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
 }
 
 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
+	       struct xsk_dma_cbs *dma_cbs,
 	       unsigned long attrs, struct page **pages, u32 nr_pages)
 {
 	struct xsk_dma_map *dma_map;
@@ -424,19 +430,32 @@  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
 		return 0;
 	}
 
+	pool->dma_cbs = dma_cbs;
+
 	dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
 	if (!dma_map)
 		return -ENOMEM;
 
 	for (i = 0; i < dma_map->dma_pages_cnt; i++) {
-		dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
-					 DMA_BIDIRECTIONAL, attrs);
-		if (dma_mapping_error(dev, dma)) {
-			__xp_dma_unmap(dma_map, attrs);
-			return -ENOMEM;
+		if (likely(!dma_cbs)) {
+			dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+						 DMA_BIDIRECTIONAL, attrs);
+			if (dma_mapping_error(dev, dma)) {
+				__xp_dma_unmap(dma_map, dma_cbs, attrs);
+				return -ENOMEM;
+			}
+			if (dma_need_sync(dev, dma))
+				dma_map->dma_need_sync = true;
+		} else {
+			dma = dma_cbs->map_page(dev, pages[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL, attrs);
+			if (dma_cbs->mapping_error(dev, dma)) {
+				__xp_dma_unmap(dma_map, dma_cbs, attrs);
+				return -ENOMEM;
+			}
+			if (dma_cbs->need_sync(dev, dma))
+				dma_map->dma_need_sync = true;
 		}
-		if (dma_need_sync(dev, dma))
-			dma_map->dma_need_sync = true;
 		dma_map->dma_pages[i] = dma;
 	}
 
@@ -445,7 +464,7 @@  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
 
 	err = xp_init_dma_info(pool, dma_map);
 	if (err) {
-		__xp_dma_unmap(dma_map, attrs);
+		__xp_dma_unmap(dma_map, dma_cbs, attrs);
 		return err;
 	}
 
@@ -532,9 +551,14 @@  struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
 	xskb->xdp.data_meta = xskb->xdp.data;
 
 	if (pool->dma_need_sync) {
-		dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
-						 pool->frame_len,
-						 DMA_BIDIRECTIONAL);
+		if (unlikely(pool->dma_cbs))
+			pool->dma_cbs->sync_single_range_for_device(pool->dev, xskb->dma, 0,
+								    pool->frame_len,
+								    DMA_BIDIRECTIONAL);
+		else
+			dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
+							 pool->frame_len,
+							 DMA_BIDIRECTIONAL);
 	}
 	return &xskb->xdp;
 }
@@ -670,15 +694,24 @@  EXPORT_SYMBOL(xp_raw_get_dma);
 
 void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
 {
-	dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
-				      xskb->pool->frame_len, DMA_BIDIRECTIONAL);
+	if (unlikely(xskb->pool->dma_cbs))
+		xskb->pool->dma_cbs->sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
+							       xskb->pool->frame_len,
+							       DMA_BIDIRECTIONAL);
+	else
+		dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
+					      xskb->pool->frame_len, DMA_BIDIRECTIONAL);
 }
 EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
 
 void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
 				 size_t size)
 {
-	dma_sync_single_range_for_device(pool->dev, dma, 0,
-					 size, DMA_BIDIRECTIONAL);
+	if (unlikely(pool->dma_cbs))
+		pool->dma_cbs->sync_single_range_for_device(pool->dev, dma, 0,
+							    size, DMA_BIDIRECTIONAL);
+	else
+		dma_sync_single_range_for_device(pool->dev, dma, 0,
+						 size, DMA_BIDIRECTIONAL);
 }
 EXPORT_SYMBOL(xp_dma_sync_for_device_slow);