diff mbox series

[net-next,v6,05/10] virtio_net: xsk: bind/unbind xsk for rx

Message ID 20240618075643.24867-6-xuanzhuo@linux.alibaba.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series virtio-net: support AF_XDP zero copy | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 845 this patch: 845
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang success Errors and warnings before: 849 this patch: 849
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 849 this patch: 849
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xuan Zhuo June 18, 2024, 7:56 a.m. UTC
This patch implement the logic of bind/unbind xsk pool to rq.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 133 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)

Comments

Jason Wang June 28, 2024, 2:19 a.m. UTC | #1
On Tue, Jun 18, 2024 at 3:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> This patch implement the logic of bind/unbind xsk pool to rq.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/net/virtio_net.c | 133 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 133 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index df885cdbe658..d8cce143be26 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -25,6 +25,7 @@
>  #include <net/net_failover.h>
>  #include <net/netdev_rx_queue.h>
>  #include <net/netdev_queues.h>
> +#include <net/xdp_sock_drv.h>
>
>  static int napi_weight = NAPI_POLL_WEIGHT;
>  module_param(napi_weight, int, 0444);
> @@ -348,6 +349,13 @@ struct receive_queue {
>
>         /* Record the last dma info to free after new pages is allocated. */
>         struct virtnet_rq_dma *last_dma;
> +
> +       struct {
> +               struct xsk_buff_pool *pool;
> +
> +               /* xdp rxq used by xsk */
> +               struct xdp_rxq_info xdp_rxq;
> +       } xsk;

I don't see a special reason for having a container struct here.


>  };
>
>  /* This structure can contain rss message with maximum settings for indirection table and keysize
> @@ -4970,6 +4978,129 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
>         return virtnet_set_guest_offloads(vi, offloads);
>  }
>
> +static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
> +                                   struct xsk_buff_pool *pool)
> +{
> +       int err, qindex;
> +
> +       qindex = rq - vi->rq;
> +
> +       if (pool) {
> +               err = xdp_rxq_info_reg(&rq->xsk.xdp_rxq, vi->dev, qindex, rq->napi.napi_id);
> +               if (err < 0)
> +                       return err;
> +
> +               err = xdp_rxq_info_reg_mem_model(&rq->xsk.xdp_rxq,
> +                                                MEM_TYPE_XSK_BUFF_POOL, NULL);
> +               if (err < 0)
> +                       goto unreg;
> +
> +               xsk_pool_set_rxq_info(pool, &rq->xsk.xdp_rxq);
> +       }
> +
> +       virtnet_rx_pause(vi, rq);
> +
> +       err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf);
> +       if (err) {
> +               netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
> +
> +               pool = NULL;
> +       }
> +
> +       rq->xsk.pool = pool;
> +
> +       virtnet_rx_resume(vi, rq);
> +
> +       if (pool)
> +               return 0;
> +
> +unreg:
> +       xdp_rxq_info_unreg(&rq->xsk.xdp_rxq);
> +       return err;
> +}
> +
> +static int virtnet_xsk_pool_enable(struct net_device *dev,
> +                                  struct xsk_buff_pool *pool,
> +                                  u16 qid)
> +{
> +       struct virtnet_info *vi = netdev_priv(dev);
> +       struct receive_queue *rq;
> +       struct device *dma_dev;
> +       struct send_queue *sq;
> +       int err;
> +
> +       /* In big_packets mode, xdp cannot work, so there is no need to
> +        * initialize xsk of rq.
> +        */
> +       if (vi->big_packets && !vi->mergeable_rx_bufs)
> +               return -ENOENT;
> +
> +       if (qid >= vi->curr_queue_pairs)
> +               return -EINVAL;
> +
> +       sq = &vi->sq[qid];
> +       rq = &vi->rq[qid];
> +
> +       /* For the xsk, the tx and rx should have the same device. The af-xdp
> +        * may use one buffer to receive from the rx and reuse this buffer to
> +        * send by the tx. So the dma dev of sq and rq should be the same one.
> +        *
> +        * But vq->dma_dev allows every vq has the respective dma dev. So I
> +        * check the dma dev of vq and sq is the same dev.

Not a native speaker, but it might be better to say "xsk assumes ....
to be the same device". And it might be better to replace "should"
with "must".

Others look good.

Thanks
Xuan Zhuo June 28, 2024, 5:42 a.m. UTC | #2
On Fri, 28 Jun 2024 10:19:34 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Tue, Jun 18, 2024 at 3:57 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > This patch implement the logic of bind/unbind xsk pool to rq.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/net/virtio_net.c | 133 +++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 133 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index df885cdbe658..d8cce143be26 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -25,6 +25,7 @@
> >  #include <net/net_failover.h>
> >  #include <net/netdev_rx_queue.h>
> >  #include <net/netdev_queues.h>
> > +#include <net/xdp_sock_drv.h>
> >
> >  static int napi_weight = NAPI_POLL_WEIGHT;
> >  module_param(napi_weight, int, 0444);
> > @@ -348,6 +349,13 @@ struct receive_queue {
> >
> >         /* Record the last dma info to free after new pages is allocated. */
> >         struct virtnet_rq_dma *last_dma;
> > +
> > +       struct {
> > +               struct xsk_buff_pool *pool;
> > +
> > +               /* xdp rxq used by xsk */
> > +               struct xdp_rxq_info xdp_rxq;
> > +       } xsk;
>
> I don't see a special reason for having a container struct here.


Will fix.

>
>
> >  };
> >
> >  /* This structure can contain rss message with maximum settings for indirection table and keysize
> > @@ -4970,6 +4978,129 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
> >         return virtnet_set_guest_offloads(vi, offloads);
> >  }
> >
> > +static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
> > +                                   struct xsk_buff_pool *pool)
> > +{
> > +       int err, qindex;
> > +
> > +       qindex = rq - vi->rq;
> > +
> > +       if (pool) {
> > +               err = xdp_rxq_info_reg(&rq->xsk.xdp_rxq, vi->dev, qindex, rq->napi.napi_id);
> > +               if (err < 0)
> > +                       return err;
> > +
> > +               err = xdp_rxq_info_reg_mem_model(&rq->xsk.xdp_rxq,
> > +                                                MEM_TYPE_XSK_BUFF_POOL, NULL);
> > +               if (err < 0)
> > +                       goto unreg;
> > +
> > +               xsk_pool_set_rxq_info(pool, &rq->xsk.xdp_rxq);
> > +       }
> > +
> > +       virtnet_rx_pause(vi, rq);
> > +
> > +       err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf);
> > +       if (err) {
> > +               netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
> > +
> > +               pool = NULL;
> > +       }
> > +
> > +       rq->xsk.pool = pool;
> > +
> > +       virtnet_rx_resume(vi, rq);
> > +
> > +       if (pool)
> > +               return 0;
> > +
> > +unreg:
> > +       xdp_rxq_info_unreg(&rq->xsk.xdp_rxq);
> > +       return err;
> > +}
> > +
> > +static int virtnet_xsk_pool_enable(struct net_device *dev,
> > +                                  struct xsk_buff_pool *pool,
> > +                                  u16 qid)
> > +{
> > +       struct virtnet_info *vi = netdev_priv(dev);
> > +       struct receive_queue *rq;
> > +       struct device *dma_dev;
> > +       struct send_queue *sq;
> > +       int err;
> > +
> > +       /* In big_packets mode, xdp cannot work, so there is no need to
> > +        * initialize xsk of rq.
> > +        */
> > +       if (vi->big_packets && !vi->mergeable_rx_bufs)
> > +               return -ENOENT;
> > +
> > +       if (qid >= vi->curr_queue_pairs)
> > +               return -EINVAL;
> > +
> > +       sq = &vi->sq[qid];
> > +       rq = &vi->rq[qid];
> > +
> > +       /* For the xsk, the tx and rx should have the same device. The af-xdp
> > +        * may use one buffer to receive from the rx and reuse this buffer to
> > +        * send by the tx. So the dma dev of sq and rq should be the same one.
> > +        *
> > +        * But vq->dma_dev allows every vq has the respective dma dev. So I
> > +        * check the dma dev of vq and sq is the same dev.
>
> Not a native speaker, but it might be better to say "xsk assumes ....
> to be the same device". And it might be better to replace "should"
> with "must".

Will fix.

Thanks.


>
> Others look good.
>
> Thanks
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index df885cdbe658..d8cce143be26 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -25,6 +25,7 @@ 
 #include <net/net_failover.h>
 #include <net/netdev_rx_queue.h>
 #include <net/netdev_queues.h>
+#include <net/xdp_sock_drv.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -348,6 +349,13 @@  struct receive_queue {
 
 	/* Record the last dma info to free after new pages is allocated. */
 	struct virtnet_rq_dma *last_dma;
+
+	struct {
+		struct xsk_buff_pool *pool;
+
+		/* xdp rxq used by xsk */
+		struct xdp_rxq_info xdp_rxq;
+	} xsk;
 };
 
 /* This structure can contain rss message with maximum settings for indirection table and keysize
@@ -4970,6 +4978,129 @@  static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
 	return virtnet_set_guest_offloads(vi, offloads);
 }
 
+static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queue *rq,
+				    struct xsk_buff_pool *pool)
+{
+	int err, qindex;
+
+	qindex = rq - vi->rq;
+
+	if (pool) {
+		err = xdp_rxq_info_reg(&rq->xsk.xdp_rxq, vi->dev, qindex, rq->napi.napi_id);
+		if (err < 0)
+			return err;
+
+		err = xdp_rxq_info_reg_mem_model(&rq->xsk.xdp_rxq,
+						 MEM_TYPE_XSK_BUFF_POOL, NULL);
+		if (err < 0)
+			goto unreg;
+
+		xsk_pool_set_rxq_info(pool, &rq->xsk.xdp_rxq);
+	}
+
+	virtnet_rx_pause(vi, rq);
+
+	err = virtqueue_reset(rq->vq, virtnet_rq_unmap_free_buf);
+	if (err) {
+		netdev_err(vi->dev, "reset rx fail: rx queue index: %d err: %d\n", qindex, err);
+
+		pool = NULL;
+	}
+
+	rq->xsk.pool = pool;
+
+	virtnet_rx_resume(vi, rq);
+
+	if (pool)
+		return 0;
+
+unreg:
+	xdp_rxq_info_unreg(&rq->xsk.xdp_rxq);
+	return err;
+}
+
+static int virtnet_xsk_pool_enable(struct net_device *dev,
+				   struct xsk_buff_pool *pool,
+				   u16 qid)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct receive_queue *rq;
+	struct device *dma_dev;
+	struct send_queue *sq;
+	int err;
+
+	/* In big_packets mode, xdp cannot work, so there is no need to
+	 * initialize xsk of rq.
+	 */
+	if (vi->big_packets && !vi->mergeable_rx_bufs)
+		return -ENOENT;
+
+	if (qid >= vi->curr_queue_pairs)
+		return -EINVAL;
+
+	sq = &vi->sq[qid];
+	rq = &vi->rq[qid];
+
+	/* For the xsk, the tx and rx should have the same device. The af-xdp
+	 * may use one buffer to receive from the rx and reuse this buffer to
+	 * send by the tx. So the dma dev of sq and rq should be the same one.
+	 *
+	 * But vq->dma_dev allows every vq has the respective dma dev. So I
+	 * check the dma dev of vq and sq is the same dev.
+	 */
+	if (virtqueue_dma_dev(rq->vq) != virtqueue_dma_dev(sq->vq))
+		return -EPERM;
+
+	dma_dev = virtqueue_dma_dev(rq->vq);
+	if (!dma_dev)
+		return -EPERM;
+
+	err = xsk_pool_dma_map(pool, dma_dev, 0);
+	if (err)
+		goto err_xsk_map;
+
+	err = virtnet_rq_bind_xsk_pool(vi, rq, pool);
+	if (err)
+		goto err_rq;
+
+	return 0;
+
+err_rq:
+	xsk_pool_dma_unmap(pool, 0);
+err_xsk_map:
+	return err;
+}
+
+static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct xsk_buff_pool *pool;
+	struct receive_queue *rq;
+	int err;
+
+	if (qid >= vi->curr_queue_pairs)
+		return -EINVAL;
+
+	rq = &vi->rq[qid];
+
+	pool = rq->xsk.pool;
+
+	err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
+
+	xsk_pool_dma_unmap(pool, 0);
+
+	return err;
+}
+
+static int virtnet_xsk_pool_setup(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	if (xdp->xsk.pool)
+		return virtnet_xsk_pool_enable(dev, xdp->xsk.pool,
+					       xdp->xsk.queue_id);
+	else
+		return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id);
+}
+
 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 			   struct netlink_ext_ack *extack)
 {
@@ -5095,6 +5226,8 @@  static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
+	case XDP_SETUP_XSK_POOL:
+		return virtnet_xsk_pool_setup(dev, xdp);
 	default:
 		return -EINVAL;
 	}