diff mbox series

[net-next,v7,09/10] virtio_net: xsk: rx: support recv small mode

Message ID 20240705073734.93905-10-xuanzhuo@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series virtio-net: support AF_XDP zero copy | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 816 this patch: 816
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang success Errors and warnings before: 821 this patch: 821
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 821 this patch: 821
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-07-05--15-00 (tests: 694)

Commit Message

Xuan Zhuo July 5, 2024, 7:37 a.m. UTC
In the process:
1. We may need to copy data to create skb for XDP_PASS.
2. We may need to call xsk_buff_free() to release the buffer.
3. The handle for xdp_buff is difference from the buffer.

If we pushed this logic into existing receive handle(merge and small),
we would have to maintain code scattered inside merge and small (and big).
So I think it is a good choice for us to put the xsk code into an
independent function.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---

v7:
   1. rename xdp_construct_skb to xsk_construct_skb
   2. refactor virtnet_receive()

 drivers/net/virtio_net.c | 176 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 168 insertions(+), 8 deletions(-)

--
2.32.0.3.g01195cf9f

Comments

Jason Wang July 8, 2024, 7 a.m. UTC | #1
On Fri, Jul 5, 2024 at 3:38 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> In the process:
> 1. We may need to copy data to create skb for XDP_PASS.
> 2. We may need to call xsk_buff_free() to release the buffer.
> 3. The handle for xdp_buff is difference from the buffer.
>
> If we pushed this logic into existing receive handle(merge and small),
> we would have to maintain code scattered inside merge and small (and big).
> So I think it is a good choice for us to put the xsk code into an
> independent function.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>
> v7:
>    1. rename xdp_construct_skb to xsk_construct_skb
>    2. refactor virtnet_receive()
>
>  drivers/net/virtio_net.c | 176 +++++++++++++++++++++++++++++++++++++--
>  1 file changed, 168 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 2b27f5ada64a..64d8cd481890 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -498,6 +498,12 @@ struct virtio_net_common_hdr {
>  };
>
>  static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> +static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
> +                              struct net_device *dev,
> +                              unsigned int *xdp_xmit,
> +                              struct virtnet_rq_stats *stats);
> +static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
> +                                struct sk_buff *skb, u8 flags);
>
>  static bool is_xdp_frame(void *ptr)
>  {
> @@ -1062,6 +1068,124 @@ static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
>         sg->length = len;
>  }
>
> +static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
> +                                  struct receive_queue *rq, void *buf, u32 len)
> +{
> +       struct xdp_buff *xdp;
> +       u32 bufsize;
> +
> +       xdp = (struct xdp_buff *)buf;
> +
> +       bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
> +
> +       if (unlikely(len > bufsize)) {
> +               pr_debug("%s: rx error: len %u exceeds truesize %u\n",
> +                        vi->dev->name, len, bufsize);
> +               DEV_STATS_INC(vi->dev, rx_length_errors);
> +               xsk_buff_free(xdp);
> +               return NULL;
> +       }
> +
> +       xsk_buff_set_size(xdp, len);
> +       xsk_buff_dma_sync_for_cpu(xdp);
> +
> +       return xdp;
> +}
> +
> +static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
> +                                        struct xdp_buff *xdp)
> +{
> +       unsigned int metasize = xdp->data - xdp->data_meta;
> +       struct sk_buff *skb;
> +       unsigned int size;
> +
> +       size = xdp->data_end - xdp->data_hard_start;
> +       skb = napi_alloc_skb(&rq->napi, size);
> +       if (unlikely(!skb)) {
> +               xsk_buff_free(xdp);
> +               return NULL;
> +       }
> +
> +       skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
> +
> +       size = xdp->data_end - xdp->data_meta;
> +       memcpy(__skb_put(skb, size), xdp->data_meta, size);
> +
> +       if (metasize) {
> +               __skb_pull(skb, metasize);
> +               skb_metadata_set(skb, metasize);
> +       }
> +
> +       xsk_buff_free(xdp);
> +
> +       return skb;
> +}
> +
> +static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
> +                                                struct receive_queue *rq, struct xdp_buff *xdp,
> +                                                unsigned int *xdp_xmit,
> +                                                struct virtnet_rq_stats *stats)
> +{
> +       struct bpf_prog *prog;
> +       u32 ret;
> +
> +       ret = XDP_PASS;
> +       rcu_read_lock();
> +       prog = rcu_dereference(rq->xdp_prog);
> +       if (prog)
> +               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> +       rcu_read_unlock();
> +
> +       switch (ret) {
> +       case XDP_PASS:
> +               return xsk_construct_skb(rq, xdp);
> +
> +       case XDP_TX:
> +       case XDP_REDIRECT:
> +               return NULL;
> +
> +       default:
> +               /* drop packet */
> +               xsk_buff_free(xdp);
> +               u64_stats_inc(&stats->drops);
> +               return NULL;
> +       }
> +}
> +
> +static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
> +                                   void *buf, u32 len,
> +                                   unsigned int *xdp_xmit,
> +                                   struct virtnet_rq_stats *stats)
> +{
> +       struct net_device *dev = vi->dev;
> +       struct sk_buff *skb = NULL;
> +       struct xdp_buff *xdp;
> +       u8 flags;
> +
> +       len -= vi->hdr_len;
> +
> +       u64_stats_add(&stats->bytes, len);
> +
> +       xdp = buf_to_xdp(vi, rq, buf, len);
> +       if (!xdp)
> +               return;
> +
> +       if (unlikely(len < ETH_HLEN)) {
> +               pr_debug("%s: short packet %i\n", dev->name, len);
> +               DEV_STATS_INC(dev, rx_length_errors);
> +               xsk_buff_free(xdp);
> +               return;
> +       }
> +
> +       flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
> +
> +       if (!vi->mergeable_rx_bufs)
> +               skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);

I wonder if we add the mergeable support in the next patch would it be
better to re-order the patch? For example, the xsk binding needs to be
moved to the last patch, otherwise we break xsk with a mergeable
buffer here?

Or anything I missed here?

Thanks
Xuan Zhuo July 8, 2024, 7:42 a.m. UTC | #2
On Mon, 8 Jul 2024 15:00:50 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Jul 5, 2024 at 3:38 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > In the process:
> > 1. We may need to copy data to create skb for XDP_PASS.
> > 2. We may need to call xsk_buff_free() to release the buffer.
> > 3. The handle for xdp_buff is difference from the buffer.
> >
> > If we pushed this logic into existing receive handle(merge and small),
> > we would have to maintain code scattered inside merge and small (and big).
> > So I think it is a good choice for us to put the xsk code into an
> > independent function.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >
> > v7:
> >    1. rename xdp_construct_skb to xsk_construct_skb
> >    2. refactor virtnet_receive()
> >
> >  drivers/net/virtio_net.c | 176 +++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 168 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 2b27f5ada64a..64d8cd481890 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -498,6 +498,12 @@ struct virtio_net_common_hdr {
> >  };
> >
> >  static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > +static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
> > +                              struct net_device *dev,
> > +                              unsigned int *xdp_xmit,
> > +                              struct virtnet_rq_stats *stats);
> > +static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
> > +                                struct sk_buff *skb, u8 flags);
> >
> >  static bool is_xdp_frame(void *ptr)
> >  {
> > @@ -1062,6 +1068,124 @@ static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> >         sg->length = len;
> >  }
> >
> > +static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
> > +                                  struct receive_queue *rq, void *buf, u32 len)
> > +{
> > +       struct xdp_buff *xdp;
> > +       u32 bufsize;
> > +
> > +       xdp = (struct xdp_buff *)buf;
> > +
> > +       bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
> > +
> > +       if (unlikely(len > bufsize)) {
> > +               pr_debug("%s: rx error: len %u exceeds truesize %u\n",
> > +                        vi->dev->name, len, bufsize);
> > +               DEV_STATS_INC(vi->dev, rx_length_errors);
> > +               xsk_buff_free(xdp);
> > +               return NULL;
> > +       }
> > +
> > +       xsk_buff_set_size(xdp, len);
> > +       xsk_buff_dma_sync_for_cpu(xdp);
> > +
> > +       return xdp;
> > +}
> > +
> > +static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
> > +                                        struct xdp_buff *xdp)
> > +{
> > +       unsigned int metasize = xdp->data - xdp->data_meta;
> > +       struct sk_buff *skb;
> > +       unsigned int size;
> > +
> > +       size = xdp->data_end - xdp->data_hard_start;
> > +       skb = napi_alloc_skb(&rq->napi, size);
> > +       if (unlikely(!skb)) {
> > +               xsk_buff_free(xdp);
> > +               return NULL;
> > +       }
> > +
> > +       skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
> > +
> > +       size = xdp->data_end - xdp->data_meta;
> > +       memcpy(__skb_put(skb, size), xdp->data_meta, size);
> > +
> > +       if (metasize) {
> > +               __skb_pull(skb, metasize);
> > +               skb_metadata_set(skb, metasize);
> > +       }
> > +
> > +       xsk_buff_free(xdp);
> > +
> > +       return skb;
> > +}
> > +
> > +static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
> > +                                                struct receive_queue *rq, struct xdp_buff *xdp,
> > +                                                unsigned int *xdp_xmit,
> > +                                                struct virtnet_rq_stats *stats)
> > +{
> > +       struct bpf_prog *prog;
> > +       u32 ret;
> > +
> > +       ret = XDP_PASS;
> > +       rcu_read_lock();
> > +       prog = rcu_dereference(rq->xdp_prog);
> > +       if (prog)
> > +               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> > +       rcu_read_unlock();
> > +
> > +       switch (ret) {
> > +       case XDP_PASS:
> > +               return xsk_construct_skb(rq, xdp);
> > +
> > +       case XDP_TX:
> > +       case XDP_REDIRECT:
> > +               return NULL;
> > +
> > +       default:
> > +               /* drop packet */
> > +               xsk_buff_free(xdp);
> > +               u64_stats_inc(&stats->drops);
> > +               return NULL;
> > +       }
> > +}
> > +
> > +static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
> > +                                   void *buf, u32 len,
> > +                                   unsigned int *xdp_xmit,
> > +                                   struct virtnet_rq_stats *stats)
> > +{
> > +       struct net_device *dev = vi->dev;
> > +       struct sk_buff *skb = NULL;
> > +       struct xdp_buff *xdp;
> > +       u8 flags;
> > +
> > +       len -= vi->hdr_len;
> > +
> > +       u64_stats_add(&stats->bytes, len);
> > +
> > +       xdp = buf_to_xdp(vi, rq, buf, len);
> > +       if (!xdp)
> > +               return;
> > +
> > +       if (unlikely(len < ETH_HLEN)) {
> > +               pr_debug("%s: short packet %i\n", dev->name, len);
> > +               DEV_STATS_INC(dev, rx_length_errors);
> > +               xsk_buff_free(xdp);
> > +               return;
> > +       }
> > +
> > +       flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
> > +
> > +       if (!vi->mergeable_rx_bufs)
> > +               skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
>
> I wonder if we add the mergeable support in the next patch would it be
> better to re-order the patch? For example, the xsk binding needs to be
> moved to the last patch, otherwise we break xsk with a mergeable
> buffer here?

If you worry that the user works with this commit, I want to say you do not
worry.

Because the flags NETDEV_XDP_ACT_XSK_ZEROCOPY is not added. I plan to add that
after the tx is completed.

I do test by adding this flags locally.

Thanks.

>
> Or anything I missed here?
>
> Thanks
>
Jason Wang July 8, 2024, 8:08 a.m. UTC | #3
On Mon, Jul 8, 2024 at 3:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Mon, 8 Jul 2024 15:00:50 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Fri, Jul 5, 2024 at 3:38 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > In the process:
> > > 1. We may need to copy data to create skb for XDP_PASS.
> > > 2. We may need to call xsk_buff_free() to release the buffer.
> > > 3. The handle for xdp_buff is difference from the buffer.
> > >
> > > If we pushed this logic into existing receive handle(merge and small),
> > > we would have to maintain code scattered inside merge and small (and big).
> > > So I think it is a good choice for us to put the xsk code into an
> > > independent function.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >
> > > v7:
> > >    1. rename xdp_construct_skb to xsk_construct_skb
> > >    2. refactor virtnet_receive()
> > >
> > >  drivers/net/virtio_net.c | 176 +++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 168 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 2b27f5ada64a..64d8cd481890 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -498,6 +498,12 @@ struct virtio_net_common_hdr {
> > >  };
> > >
> > >  static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > +static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
> > > +                              struct net_device *dev,
> > > +                              unsigned int *xdp_xmit,
> > > +                              struct virtnet_rq_stats *stats);
> > > +static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
> > > +                                struct sk_buff *skb, u8 flags);
> > >
> > >  static bool is_xdp_frame(void *ptr)
> > >  {
> > > @@ -1062,6 +1068,124 @@ static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > >         sg->length = len;
> > >  }
> > >
> > > +static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
> > > +                                  struct receive_queue *rq, void *buf, u32 len)
> > > +{
> > > +       struct xdp_buff *xdp;
> > > +       u32 bufsize;
> > > +
> > > +       xdp = (struct xdp_buff *)buf;
> > > +
> > > +       bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
> > > +
> > > +       if (unlikely(len > bufsize)) {
> > > +               pr_debug("%s: rx error: len %u exceeds truesize %u\n",
> > > +                        vi->dev->name, len, bufsize);
> > > +               DEV_STATS_INC(vi->dev, rx_length_errors);
> > > +               xsk_buff_free(xdp);
> > > +               return NULL;
> > > +       }
> > > +
> > > +       xsk_buff_set_size(xdp, len);
> > > +       xsk_buff_dma_sync_for_cpu(xdp);
> > > +
> > > +       return xdp;
> > > +}
> > > +
> > > +static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
> > > +                                        struct xdp_buff *xdp)
> > > +{
> > > +       unsigned int metasize = xdp->data - xdp->data_meta;
> > > +       struct sk_buff *skb;
> > > +       unsigned int size;
> > > +
> > > +       size = xdp->data_end - xdp->data_hard_start;
> > > +       skb = napi_alloc_skb(&rq->napi, size);
> > > +       if (unlikely(!skb)) {
> > > +               xsk_buff_free(xdp);
> > > +               return NULL;
> > > +       }
> > > +
> > > +       skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
> > > +
> > > +       size = xdp->data_end - xdp->data_meta;
> > > +       memcpy(__skb_put(skb, size), xdp->data_meta, size);
> > > +
> > > +       if (metasize) {
> > > +               __skb_pull(skb, metasize);
> > > +               skb_metadata_set(skb, metasize);
> > > +       }
> > > +
> > > +       xsk_buff_free(xdp);
> > > +
> > > +       return skb;
> > > +}
> > > +
> > > +static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
> > > +                                                struct receive_queue *rq, struct xdp_buff *xdp,
> > > +                                                unsigned int *xdp_xmit,
> > > +                                                struct virtnet_rq_stats *stats)
> > > +{
> > > +       struct bpf_prog *prog;
> > > +       u32 ret;
> > > +
> > > +       ret = XDP_PASS;
> > > +       rcu_read_lock();
> > > +       prog = rcu_dereference(rq->xdp_prog);
> > > +       if (prog)
> > > +               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> > > +       rcu_read_unlock();
> > > +
> > > +       switch (ret) {
> > > +       case XDP_PASS:
> > > +               return xsk_construct_skb(rq, xdp);
> > > +
> > > +       case XDP_TX:
> > > +       case XDP_REDIRECT:
> > > +               return NULL;
> > > +
> > > +       default:
> > > +               /* drop packet */
> > > +               xsk_buff_free(xdp);
> > > +               u64_stats_inc(&stats->drops);
> > > +               return NULL;
> > > +       }
> > > +}
> > > +
> > > +static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
> > > +                                   void *buf, u32 len,
> > > +                                   unsigned int *xdp_xmit,
> > > +                                   struct virtnet_rq_stats *stats)
> > > +{
> > > +       struct net_device *dev = vi->dev;
> > > +       struct sk_buff *skb = NULL;
> > > +       struct xdp_buff *xdp;
> > > +       u8 flags;
> > > +
> > > +       len -= vi->hdr_len;
> > > +
> > > +       u64_stats_add(&stats->bytes, len);
> > > +
> > > +       xdp = buf_to_xdp(vi, rq, buf, len);
> > > +       if (!xdp)
> > > +               return;
> > > +
> > > +       if (unlikely(len < ETH_HLEN)) {
> > > +               pr_debug("%s: short packet %i\n", dev->name, len);
> > > +               DEV_STATS_INC(dev, rx_length_errors);
> > > +               xsk_buff_free(xdp);
> > > +               return;
> > > +       }
> > > +
> > > +       flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
> > > +
> > > +       if (!vi->mergeable_rx_bufs)
> > > +               skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
> >
> > I wonder if we add the mergeable support in the next patch would it be
> > better to re-order the patch? For example, the xsk binding needs to be
> > moved to the last patch, otherwise we break xsk with a mergeable
> > buffer here?
>
> If you worry that the user works with this commit, I want to say you do not
> worry.
>
> Because the flags NETDEV_XDP_ACT_XSK_ZEROCOPY is not added. I plan to add that
> after the tx is completed.

Ok, this is something I missed, it would be better to mention it
somewhere (or it is already there but I miss it).

>
> I do test by adding this flags locally.
>
> Thanks.

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks

>
> >
> > Or anything I missed here?
> >
> > Thanks
> >
>
Xuan Zhuo July 8, 2024, 8:09 a.m. UTC | #4
On Mon, 8 Jul 2024 16:08:44 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Mon, Jul 8, 2024 at 3:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Mon, 8 Jul 2024 15:00:50 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Fri, Jul 5, 2024 at 3:38 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > In the process:
> > > > 1. We may need to copy data to create skb for XDP_PASS.
> > > > 2. We may need to call xsk_buff_free() to release the buffer.
> > > > 3. The handle for xdp_buff is difference from the buffer.
> > > >
> > > > If we pushed this logic into existing receive handle(merge and small),
> > > > we would have to maintain code scattered inside merge and small (and big).
> > > > So I think it is a good choice for us to put the xsk code into an
> > > > independent function.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >
> > > > v7:
> > > >    1. rename xdp_construct_skb to xsk_construct_skb
> > > >    2. refactor virtnet_receive()
> > > >
> > > >  drivers/net/virtio_net.c | 176 +++++++++++++++++++++++++++++++++++++--
> > > >  1 file changed, 168 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index 2b27f5ada64a..64d8cd481890 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -498,6 +498,12 @@ struct virtio_net_common_hdr {
> > > >  };
> > > >
> > > >  static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > +static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
> > > > +                              struct net_device *dev,
> > > > +                              unsigned int *xdp_xmit,
> > > > +                              struct virtnet_rq_stats *stats);
> > > > +static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
> > > > +                                struct sk_buff *skb, u8 flags);
> > > >
> > > >  static bool is_xdp_frame(void *ptr)
> > > >  {
> > > > @@ -1062,6 +1068,124 @@ static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
> > > >         sg->length = len;
> > > >  }
> > > >
> > > > +static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
> > > > +                                  struct receive_queue *rq, void *buf, u32 len)
> > > > +{
> > > > +       struct xdp_buff *xdp;
> > > > +       u32 bufsize;
> > > > +
> > > > +       xdp = (struct xdp_buff *)buf;
> > > > +
> > > > +       bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
> > > > +
> > > > +       if (unlikely(len > bufsize)) {
> > > > +               pr_debug("%s: rx error: len %u exceeds truesize %u\n",
> > > > +                        vi->dev->name, len, bufsize);
> > > > +               DEV_STATS_INC(vi->dev, rx_length_errors);
> > > > +               xsk_buff_free(xdp);
> > > > +               return NULL;
> > > > +       }
> > > > +
> > > > +       xsk_buff_set_size(xdp, len);
> > > > +       xsk_buff_dma_sync_for_cpu(xdp);
> > > > +
> > > > +       return xdp;
> > > > +}
> > > > +
> > > > +static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
> > > > +                                        struct xdp_buff *xdp)
> > > > +{
> > > > +       unsigned int metasize = xdp->data - xdp->data_meta;
> > > > +       struct sk_buff *skb;
> > > > +       unsigned int size;
> > > > +
> > > > +       size = xdp->data_end - xdp->data_hard_start;
> > > > +       skb = napi_alloc_skb(&rq->napi, size);
> > > > +       if (unlikely(!skb)) {
> > > > +               xsk_buff_free(xdp);
> > > > +               return NULL;
> > > > +       }
> > > > +
> > > > +       skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
> > > > +
> > > > +       size = xdp->data_end - xdp->data_meta;
> > > > +       memcpy(__skb_put(skb, size), xdp->data_meta, size);
> > > > +
> > > > +       if (metasize) {
> > > > +               __skb_pull(skb, metasize);
> > > > +               skb_metadata_set(skb, metasize);
> > > > +       }
> > > > +
> > > > +       xsk_buff_free(xdp);
> > > > +
> > > > +       return skb;
> > > > +}
> > > > +
> > > > +static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
> > > > +                                                struct receive_queue *rq, struct xdp_buff *xdp,
> > > > +                                                unsigned int *xdp_xmit,
> > > > +                                                struct virtnet_rq_stats *stats)
> > > > +{
> > > > +       struct bpf_prog *prog;
> > > > +       u32 ret;
> > > > +
> > > > +       ret = XDP_PASS;
> > > > +       rcu_read_lock();
> > > > +       prog = rcu_dereference(rq->xdp_prog);
> > > > +       if (prog)
> > > > +               ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
> > > > +       rcu_read_unlock();
> > > > +
> > > > +       switch (ret) {
> > > > +       case XDP_PASS:
> > > > +               return xsk_construct_skb(rq, xdp);
> > > > +
> > > > +       case XDP_TX:
> > > > +       case XDP_REDIRECT:
> > > > +               return NULL;
> > > > +
> > > > +       default:
> > > > +               /* drop packet */
> > > > +               xsk_buff_free(xdp);
> > > > +               u64_stats_inc(&stats->drops);
> > > > +               return NULL;
> > > > +       }
> > > > +}
> > > > +
> > > > +static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
> > > > +                                   void *buf, u32 len,
> > > > +                                   unsigned int *xdp_xmit,
> > > > +                                   struct virtnet_rq_stats *stats)
> > > > +{
> > > > +       struct net_device *dev = vi->dev;
> > > > +       struct sk_buff *skb = NULL;
> > > > +       struct xdp_buff *xdp;
> > > > +       u8 flags;
> > > > +
> > > > +       len -= vi->hdr_len;
> > > > +
> > > > +       u64_stats_add(&stats->bytes, len);
> > > > +
> > > > +       xdp = buf_to_xdp(vi, rq, buf, len);
> > > > +       if (!xdp)
> > > > +               return;
> > > > +
> > > > +       if (unlikely(len < ETH_HLEN)) {
> > > > +               pr_debug("%s: short packet %i\n", dev->name, len);
> > > > +               DEV_STATS_INC(dev, rx_length_errors);
> > > > +               xsk_buff_free(xdp);
> > > > +               return;
> > > > +       }
> > > > +
> > > > +       flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
> > > > +
> > > > +       if (!vi->mergeable_rx_bufs)
> > > > +               skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
> > >
> > > I wonder if we add the mergeable support in the next patch would it be
> > > better to re-order the patch? For example, the xsk binding needs to be
> > > moved to the last patch, otherwise we break xsk with a mergeable
> > > buffer here?
> >
> > If you worry that the user works with this commit, I want to say you do not
> > worry.
> >
> > Because the flags NETDEV_XDP_ACT_XSK_ZEROCOPY is not added. I plan to add that
> > after the tx is completed.
>
> Ok, this is something I missed, it would be better to mention it
> somewhere (or it is already there but I miss it).


OK. I will add it to next version cover.

Thanks.


>
> >
> > I do test by adding this flags locally.
> >
> > Thanks.
>
> Acked-by: Jason Wang <jasowang@redhat.com>
>
> Thanks
>
> >
> > >
> > > Or anything I missed here?
> > >
> > > Thanks
> > >
> >
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2b27f5ada64a..64d8cd481890 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -498,6 +498,12 @@  struct virtio_net_common_hdr {
 };

 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
+static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
+			       struct net_device *dev,
+			       unsigned int *xdp_xmit,
+			       struct virtnet_rq_stats *stats);
+static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq,
+				 struct sk_buff *skb, u8 flags);

 static bool is_xdp_frame(void *ptr)
 {
@@ -1062,6 +1068,124 @@  static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
 	sg->length = len;
 }

+static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
+				   struct receive_queue *rq, void *buf, u32 len)
+{
+	struct xdp_buff *xdp;
+	u32 bufsize;
+
+	xdp = (struct xdp_buff *)buf;
+
+	bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
+
+	if (unlikely(len > bufsize)) {
+		pr_debug("%s: rx error: len %u exceeds truesize %u\n",
+			 vi->dev->name, len, bufsize);
+		DEV_STATS_INC(vi->dev, rx_length_errors);
+		xsk_buff_free(xdp);
+		return NULL;
+	}
+
+	xsk_buff_set_size(xdp, len);
+	xsk_buff_dma_sync_for_cpu(xdp);
+
+	return xdp;
+}
+
+static struct sk_buff *xsk_construct_skb(struct receive_queue *rq,
+					 struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+	unsigned int size;
+
+	size = xdp->data_end - xdp->data_hard_start;
+	skb = napi_alloc_skb(&rq->napi, size);
+	if (unlikely(!skb)) {
+		xsk_buff_free(xdp);
+		return NULL;
+	}
+
+	skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+
+	size = xdp->data_end - xdp->data_meta;
+	memcpy(__skb_put(skb, size), xdp->data_meta, size);
+
+	if (metasize) {
+		__skb_pull(skb, metasize);
+		skb_metadata_set(skb, metasize);
+	}
+
+	xsk_buff_free(xdp);
+
+	return skb;
+}
+
+static struct sk_buff *virtnet_receive_xsk_small(struct net_device *dev, struct virtnet_info *vi,
+						 struct receive_queue *rq, struct xdp_buff *xdp,
+						 unsigned int *xdp_xmit,
+						 struct virtnet_rq_stats *stats)
+{
+	struct bpf_prog *prog;
+	u32 ret;
+
+	ret = XDP_PASS;
+	rcu_read_lock();
+	prog = rcu_dereference(rq->xdp_prog);
+	if (prog)
+		ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats);
+	rcu_read_unlock();
+
+	switch (ret) {
+	case XDP_PASS:
+		return xsk_construct_skb(rq, xdp);
+
+	case XDP_TX:
+	case XDP_REDIRECT:
+		return NULL;
+
+	default:
+		/* drop packet */
+		xsk_buff_free(xdp);
+		u64_stats_inc(&stats->drops);
+		return NULL;
+	}
+}
+
+static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queue *rq,
+				    void *buf, u32 len,
+				    unsigned int *xdp_xmit,
+				    struct virtnet_rq_stats *stats)
+{
+	struct net_device *dev = vi->dev;
+	struct sk_buff *skb = NULL;
+	struct xdp_buff *xdp;
+	u8 flags;
+
+	len -= vi->hdr_len;
+
+	u64_stats_add(&stats->bytes, len);
+
+	xdp = buf_to_xdp(vi, rq, buf, len);
+	if (!xdp)
+		return;
+
+	if (unlikely(len < ETH_HLEN)) {
+		pr_debug("%s: short packet %i\n", dev->name, len);
+		DEV_STATS_INC(dev, rx_length_errors);
+		xsk_buff_free(xdp);
+		return;
+	}
+
+	flags = ((struct virtio_net_common_hdr *)(xdp->data - vi->hdr_len))->hdr.flags;
+
+	if (!vi->mergeable_rx_bufs)
+		skb = virtnet_receive_xsk_small(dev, vi, rq, xdp, xdp_xmit, stats);
+
+	if (skb)
+		virtnet_receive_done(vi, rq, skb, flags);
+}
+
 static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue *rq,
 				   struct xsk_buff_pool *pool, gfp_t gfp)
 {
@@ -2392,32 +2516,68 @@  static void refill_work(struct work_struct *work)
 	}
 }

-static int virtnet_receive(struct receive_queue *rq, int budget,
-			   unsigned int *xdp_xmit)
+static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
+				    struct receive_queue *rq,
+				    int budget,
+				    unsigned int *xdp_xmit,
+				    struct virtnet_rq_stats *stats)
+{
+	unsigned int len;
+	int packets = 0;
+	void *buf;
+
+	while (packets < budget) {
+		buf = virtqueue_get_buf(rq->vq, &len);
+		if (!buf)
+			break;
+
+		virtnet_receive_xsk_buf(vi, rq, buf, len, xdp_xmit, stats);
+		packets++;
+	}
+
+	return packets;
+}
+
+static int virtnet_receive_packets(struct virtnet_info *vi,
+				   struct receive_queue *rq,
+				   int budget,
+				   unsigned int *xdp_xmit,
+				   struct virtnet_rq_stats *stats)
 {
-	struct virtnet_info *vi = rq->vq->vdev->priv;
-	struct virtnet_rq_stats stats = {};
 	unsigned int len;
 	int packets = 0;
 	void *buf;
-	int i;

 	if (!vi->big_packets || vi->mergeable_rx_bufs) {
 		void *ctx;
-
 		while (packets < budget &&
 		       (buf = virtnet_rq_get_buf(rq, &len, &ctx))) {
-			receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
+			receive_buf(vi, rq, buf, len, ctx, xdp_xmit, stats);
 			packets++;
 		}
 	} else {
 		while (packets < budget &&
 		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
-			receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
+			receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
 			packets++;
 		}
 	}

+	return packets;
+}
+
+static int virtnet_receive(struct receive_queue *rq, int budget,
+			   unsigned int *xdp_xmit)
+{
+	struct virtnet_info *vi = rq->vq->vdev->priv;
+	struct virtnet_rq_stats stats = {};
+	int i, packets;
+
+	if (rq->xsk_pool)
+		packets = virtnet_receive_xsk_bufs(vi, rq, budget, xdp_xmit, &stats);
+	else
+		packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
+
 	if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
 		if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
 			spin_lock(&vi->refill_lock);