diff mbox series

[net-next,v5] virtio_net: Support RX hash XDP hint

Message ID 20240202121151.65710-1-liangchen.linux@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v5] virtio_net: Support RX hash XDP hint | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1048 this patch: 1048
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1065 this patch: 1065
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1065 this patch: 1065
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-02-04--21-00 (tests: 721)

Commit Message

Liang Chen Feb. 2, 2024, 12:11 p.m. UTC
The RSS hash report is a feature that's part of the virtio specification.
Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
(still a work in progress as per [1]) support this feature. While the
capability to obtain the RSS hash has been enabled in the normal path,
it's currently missing in the XDP path. Therefore, we are introducing
XDP hints through kfuncs to allow XDP programs to access the RSS hash.

1.
https://lore.kernel.org/all/20231015141644.260646-1-akihiko.odaki@daynix.com/#r

Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
  Changes from v4:
- cc complete list of maintainers
---
 drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 86 insertions(+), 12 deletions(-)

Comments

Jesper Dangaard Brouer Feb. 2, 2024, 4:19 p.m. UTC | #1
On 02/02/2024 13.11, Liang Chen wrote:
> The RSS hash report is a feature that's part of the virtio specification.
> Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
> (still a work in progress as per [1]) support this feature. While the
> capability to obtain the RSS hash has been enabled in the normal path,
> it's currently missing in the XDP path. Therefore, we are introducing
> XDP hints through kfuncs to allow XDP programs to access the RSS hash.
> 
> 1.
> https://lore.kernel.org/all/20231015141644.260646-1-akihiko.odaki@daynix.com/#r
> 
> Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
> Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> Acked-by: Jason Wang <jasowang@redhat.com>
> ---
>    Changes from v4:
> - cc complete list of maintainers
> ---
>   drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++++-----
>   1 file changed, 86 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index d7ce4a1011ea..7ce666c86ee0 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -349,6 +349,12 @@ struct virtio_net_common_hdr {
>   	};
>   };
>   
> +struct virtnet_xdp_buff {
> +	struct xdp_buff xdp;
> +	__le32 hash_value;
> +	__le16 hash_report;
> +};
> +
>   static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
>   
>   static bool is_xdp_frame(void *ptr)
> @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
>   	}
>   }
>   
> +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> +				     struct net_device *dev,
> +				     struct virtio_net_hdr_v1_hash *hdr_hash)
> +{
> +	if (dev->features & NETIF_F_RXHASH) {
> +		virtnet_xdp->hash_value = hdr_hash->hash_value;
> +		virtnet_xdp->hash_report = hdr_hash->hash_report;
> +	}
> +}
> +

Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
with the purpose of delaying extracting this, until and only if XDP
bpf_prog calls the kfunc?



>   static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
>   			       struct net_device *dev,
>   			       unsigned int *xdp_xmit,
> @@ -1199,9 +1215,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
>   	unsigned int headroom = vi->hdr_len + header_offset;
>   	struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
>   	struct page *page = virt_to_head_page(buf);
> +	struct virtnet_xdp_buff virtnet_xdp;
>   	struct page *xdp_page;
> +	struct xdp_buff *xdp;
>   	unsigned int buflen;
> -	struct xdp_buff xdp;
>   	struct sk_buff *skb;
>   	unsigned int metasize = 0;
>   	u32 act;
> @@ -1233,17 +1250,20 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
>   		page = xdp_page;
>   	}
>   
> -	xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
> -	xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
> +	xdp = &virtnet_xdp.xdp;
> +	xdp_init_buff(xdp, buflen, &rq->xdp_rxq);
> +	xdp_prepare_buff(xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
>   			 xdp_headroom, len, true);
>   
> -	act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
> +	virtnet_xdp_save_rx_hash(&virtnet_xdp, dev, (void *)hdr);
> +
> +	act = virtnet_xdp_handler(xdp_prog, xdp, dev, xdp_xmit, stats);
>   
>   	switch (act) {
>   	case XDP_PASS:
>   		/* Recalculate length in case bpf program changed it */
> -		len = xdp.data_end - xdp.data;
> -		metasize = xdp.data - xdp.data_meta;
> +		len = xdp->data_end - xdp->data;
> +		metasize = xdp->data - xdp->data_meta;
>   		break;
>   
>   	case XDP_TX:
> @@ -1254,7 +1274,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
>   		goto err_xdp;
>   	}
>   
> -	skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
> +	skb = virtnet_build_skb(buf, buflen, xdp->data - buf, len);
>   	if (unlikely(!skb))
>   		goto err;
>   
> @@ -1591,10 +1611,11 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
>   	int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
>   	struct page *page = virt_to_head_page(buf);
>   	int offset = buf - page_address(page);
> +	struct virtnet_xdp_buff virtnet_xdp;
>   	unsigned int xdp_frags_truesz = 0;
>   	struct sk_buff *head_skb;
>   	unsigned int frame_sz;
> -	struct xdp_buff xdp;
> +	struct xdp_buff *xdp;
>   	void *data;
>   	u32 act;
>   	int err;
> @@ -1604,16 +1625,19 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
>   	if (unlikely(!data))
>   		goto err_xdp;
>   
> -	err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
> +	xdp = &virtnet_xdp.xdp;
> +	err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp, data, len, frame_sz,
>   					 &num_buf, &xdp_frags_truesz, stats);
>   	if (unlikely(err))
>   		goto err_xdp;
>   
> -	act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
> +	virtnet_xdp_save_rx_hash(&virtnet_xdp, dev, (void *)hdr);
> +
> +	act = virtnet_xdp_handler(xdp_prog, xdp, dev, xdp_xmit, stats);
>   
>   	switch (act) {
>   	case XDP_PASS:
> -		head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
> +		head_skb = build_skb_from_xdp_buff(dev, vi, xdp, xdp_frags_truesz);
>   		if (unlikely(!head_skb))
>   			break;
>   		return head_skb;
> @@ -1626,7 +1650,7 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
>   		break;
>   	}
>   
> -	put_xdp_frags(&xdp);
> +	put_xdp_frags(xdp);
>   
>   err_xdp:
>   	put_page(page);
> @@ -4579,6 +4603,55 @@ static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
>   	}
>   }
>   
> +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
> +			       enum xdp_rss_hash_type *rss_type)
> +{
> +	const struct virtnet_xdp_buff *virtnet_xdp = (void *)_ctx;
> +
> +	if (!(virtnet_xdp->xdp.rxq->dev->features & NETIF_F_RXHASH))
> +		return -ENODATA;
> +
> +	switch (__le16_to_cpu(virtnet_xdp->hash_report)) {
> +	case VIRTIO_NET_HASH_REPORT_TCPv4:
> +		*rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_UDPv4:
> +		*rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_TCPv6:
> +		*rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_UDPv6:
> +		*rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> +		*rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> +		*rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_IPv4:
> +		*rss_type = XDP_RSS_TYPE_L3_IPV4;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_IPv6:
> +		*rss_type = XDP_RSS_TYPE_L3_IPV6;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> +		*rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
> +		break;
> +	case VIRTIO_NET_HASH_REPORT_NONE:
> +	default:
> +		*rss_type = XDP_RSS_TYPE_NONE;
> +	}
> +
> +	*hash = __le32_to_cpu(virtnet_xdp->hash_value);
> +	return 0;
> +}
> +
> +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> +	.xmo_rx_hash			= virtnet_xdp_rx_hash,
> +};
> +
>   static int virtnet_probe(struct virtio_device *vdev)
>   {
>   	int i, err = -ENOMEM;
> @@ -4704,6 +4777,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>   				  VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
>   
>   		dev->hw_features |= NETIF_F_RXHASH;
> +		dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
>   	}
>   
>   	if (vi->has_rss_hash_report)
Liang Chen Feb. 3, 2024, 2:56 a.m. UTC | #2
On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
>
>
>
> On 02/02/2024 13.11, Liang Chen wrote:
> > The RSS hash report is a feature that's part of the virtio specification.
> > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
> > (still a work in progress as per [1]) support this feature. While the
> > capability to obtain the RSS hash has been enabled in the normal path,
> > it's currently missing in the XDP path. Therefore, we are introducing
> > XDP hints through kfuncs to allow XDP programs to access the RSS hash.
> >
> > 1.
> > https://lore.kernel.org/all/20231015141644.260646-1-akihiko.odaki@daynix.com/#r
> >
> > Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
> > Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > Acked-by: Jason Wang <jasowang@redhat.com>
> > ---
> >    Changes from v4:
> > - cc complete list of maintainers
> > ---
> >   drivers/net/virtio_net.c | 98 +++++++++++++++++++++++++++++++++++-----
> >   1 file changed, 86 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index d7ce4a1011ea..7ce666c86ee0 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -349,6 +349,12 @@ struct virtio_net_common_hdr {
> >       };
> >   };
> >
> > +struct virtnet_xdp_buff {
> > +     struct xdp_buff xdp;
> > +     __le32 hash_value;
> > +     __le16 hash_report;
> > +};
> > +
> >   static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> >
> >   static bool is_xdp_frame(void *ptr)
> > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> >       }
> >   }
> >
> > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > +                                  struct net_device *dev,
> > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > +{
> > +     if (dev->features & NETIF_F_RXHASH) {
> > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > +     }
> > +}
> > +
>
> Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> with the purpose of delaying extracting this, until and only if XDP
> bpf_prog calls the kfunc?
>

That seems to be the way v1 works,
https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
. But it was pointed out that the inline header may be overwritten by
the xdp prog, so the hash is copied out to maintain its integrity.


Thanks,
Liang

>
>
> >   static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
> >                              struct net_device *dev,
> >                              unsigned int *xdp_xmit,
> > @@ -1199,9 +1215,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
> >       unsigned int headroom = vi->hdr_len + header_offset;
> >       struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
> >       struct page *page = virt_to_head_page(buf);
> > +     struct virtnet_xdp_buff virtnet_xdp;
> >       struct page *xdp_page;
> > +     struct xdp_buff *xdp;
> >       unsigned int buflen;
> > -     struct xdp_buff xdp;
> >       struct sk_buff *skb;
> >       unsigned int metasize = 0;
> >       u32 act;
> > @@ -1233,17 +1250,20 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
> >               page = xdp_page;
> >       }
> >
> > -     xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
> > -     xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
> > +     xdp = &virtnet_xdp.xdp;
> > +     xdp_init_buff(xdp, buflen, &rq->xdp_rxq);
> > +     xdp_prepare_buff(xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
> >                        xdp_headroom, len, true);
> >
> > -     act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
> > +     virtnet_xdp_save_rx_hash(&virtnet_xdp, dev, (void *)hdr);
> > +
> > +     act = virtnet_xdp_handler(xdp_prog, xdp, dev, xdp_xmit, stats);
> >
> >       switch (act) {
> >       case XDP_PASS:
> >               /* Recalculate length in case bpf program changed it */
> > -             len = xdp.data_end - xdp.data;
> > -             metasize = xdp.data - xdp.data_meta;
> > +             len = xdp->data_end - xdp->data;
> > +             metasize = xdp->data - xdp->data_meta;
> >               break;
> >
> >       case XDP_TX:
> > @@ -1254,7 +1274,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
> >               goto err_xdp;
> >       }
> >
> > -     skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
> > +     skb = virtnet_build_skb(buf, buflen, xdp->data - buf, len);
> >       if (unlikely(!skb))
> >               goto err;
> >
> > @@ -1591,10 +1611,11 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> >       int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
> >       struct page *page = virt_to_head_page(buf);
> >       int offset = buf - page_address(page);
> > +     struct virtnet_xdp_buff virtnet_xdp;
> >       unsigned int xdp_frags_truesz = 0;
> >       struct sk_buff *head_skb;
> >       unsigned int frame_sz;
> > -     struct xdp_buff xdp;
> > +     struct xdp_buff *xdp;
> >       void *data;
> >       u32 act;
> >       int err;
> > @@ -1604,16 +1625,19 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> >       if (unlikely(!data))
> >               goto err_xdp;
> >
> > -     err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
> > +     xdp = &virtnet_xdp.xdp;
> > +     err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp, data, len, frame_sz,
> >                                        &num_buf, &xdp_frags_truesz, stats);
> >       if (unlikely(err))
> >               goto err_xdp;
> >
> > -     act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
> > +     virtnet_xdp_save_rx_hash(&virtnet_xdp, dev, (void *)hdr);
> > +
> > +     act = virtnet_xdp_handler(xdp_prog, xdp, dev, xdp_xmit, stats);
> >
> >       switch (act) {
> >       case XDP_PASS:
> > -             head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
> > +             head_skb = build_skb_from_xdp_buff(dev, vi, xdp, xdp_frags_truesz);
> >               if (unlikely(!head_skb))
> >                       break;
> >               return head_skb;
> > @@ -1626,7 +1650,7 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> >               break;
> >       }
> >
> > -     put_xdp_frags(&xdp);
> > +     put_xdp_frags(xdp);
> >
> >   err_xdp:
> >       put_page(page);
> > @@ -4579,6 +4603,55 @@ static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
> >       }
> >   }
> >
> > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
> > +                            enum xdp_rss_hash_type *rss_type)
> > +{
> > +     const struct virtnet_xdp_buff *virtnet_xdp = (void *)_ctx;
> > +
> > +     if (!(virtnet_xdp->xdp.rxq->dev->features & NETIF_F_RXHASH))
> > +             return -ENODATA;
> > +
> > +     switch (__le16_to_cpu(virtnet_xdp->hash_report)) {
> > +     case VIRTIO_NET_HASH_REPORT_TCPv4:
> > +             *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_UDPv4:
> > +             *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_TCPv6:
> > +             *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_UDPv6:
> > +             *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > +             *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > +             *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_IPv4:
> > +             *rss_type = XDP_RSS_TYPE_L3_IPV4;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_IPv6:
> > +             *rss_type = XDP_RSS_TYPE_L3_IPV6;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > +             *rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
> > +             break;
> > +     case VIRTIO_NET_HASH_REPORT_NONE:
> > +     default:
> > +             *rss_type = XDP_RSS_TYPE_NONE;
> > +     }
> > +
> > +     *hash = __le32_to_cpu(virtnet_xdp->hash_value);
> > +     return 0;
> > +}
> > +
> > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> > +     .xmo_rx_hash                    = virtnet_xdp_rx_hash,
> > +};
> > +
> >   static int virtnet_probe(struct virtio_device *vdev)
> >   {
> >       int i, err = -ENOMEM;
> > @@ -4704,6 +4777,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> >                                 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
> >
> >               dev->hw_features |= NETIF_F_RXHASH;
> > +             dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
> >       }
> >
> >       if (vi->has_rss_hash_report)
Paolo Abeni Feb. 6, 2024, 10:43 a.m. UTC | #3
On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > On 02/02/2024 13.11, Liang Chen wrote:
[...]
> > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > >       }
> > >   }
> > > 
> > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > +                                  struct net_device *dev,
> > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > +{
> > > +     if (dev->features & NETIF_F_RXHASH) {
> > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > +     }
> > > +}
> > > +
> > 
> > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > with the purpose of delaying extracting this, until and only if XDP
> > bpf_prog calls the kfunc?
> > 
> 
> That seems to be the way v1 works,
> https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> . But it was pointed out that the inline header may be overwritten by
> the xdp prog, so the hash is copied out to maintain its integrity.

Why? isn't XDP supposed to get write access only to the pkt
contents/buffer?

if the XDP program can really change the virtio_net_hdr, that looks
potentially dangerous/bug prone regardless of this patch.

Thanks,

Paolo
Liang Chen Feb. 7, 2024, 2:54 a.m. UTC | #4
On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > On 02/02/2024 13.11, Liang Chen wrote:
> [...]
> > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > >       }
> > > >   }
> > > >
> > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > +                                  struct net_device *dev,
> > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > +{
> > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > +     }
> > > > +}
> > > > +
> > >
> > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > with the purpose of delaying extracting this, until and only if XDP
> > > bpf_prog calls the kfunc?
> > >
> >
> > That seems to be the way v1 works,
> > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > . But it was pointed out that the inline header may be overwritten by
> > the xdp prog, so the hash is copied out to maintain its integrity.
>
> Why? isn't XDP supposed to get write access only to the pkt
> contents/buffer?
>

Normally, an XDP program accesses only the packet data. However,
there's also an XDP RX Metadata area, referenced by the data_meta
pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
point somewhere ahead of the data buffer, thereby granting the XDP
program access to the virtio header located immediately before the
packet data.

Thanks,
Liang

> if the XDP program can really change the virtio_net_hdr, that looks
> potentially dangerous/bug prone regardless of this patch.
>
> Thanks,
>
> Paolo
>
Paolo Abeni Feb. 7, 2024, 2:27 p.m. UTC | #5
On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > 
> > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > On 02/02/2024 13.11, Liang Chen wrote:
> > [...]
> > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > >       }
> > > > >   }
> > > > > 
> > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > +                                  struct net_device *dev,
> > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > +{
> > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > +     }
> > > > > +}
> > > > > +
> > > > 
> > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > with the purpose of delaying extracting this, until and only if XDP
> > > > bpf_prog calls the kfunc?
> > > > 
> > > 
> > > That seems to be the way v1 works,
> > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > . But it was pointed out that the inline header may be overwritten by
> > > the xdp prog, so the hash is copied out to maintain its integrity.
> > 
> > Why? isn't XDP supposed to get write access only to the pkt
> > contents/buffer?
> > 
> 
> Normally, an XDP program accesses only the packet data. However,
> there's also an XDP RX Metadata area, referenced by the data_meta
> pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> point somewhere ahead of the data buffer, thereby granting the XDP
> program access to the virtio header located immediately before the

AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
xdp->data_hard_start:

https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210

and virtio net set such field after the virtio_net_hdr:

https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420

I don't see how the virtio hdr could be touched? Possibly even more
important: if such thing is possible, I think is should be somewhat
denied (for the same reason an H/W nic should prevent XDP from
modifying its own buffer descriptor).

Cheers,

Paolo
Liang Chen Feb. 9, 2024, 10:39 a.m. UTC | #6
On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > >
> > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > [...]
> > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > >       }
> > > > > >   }
> > > > > >
> > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > +                                  struct net_device *dev,
> > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > +{
> > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > +     }
> > > > > > +}
> > > > > > +
> > > > >
> > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > bpf_prog calls the kfunc?
> > > > >
> > > >
> > > > That seems to be the way v1 works,
> > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > . But it was pointed out that the inline header may be overwritten by
> > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > >
> > > Why? isn't XDP supposed to get write access only to the pkt
> > > contents/buffer?
> > >
> >
> > Normally, an XDP program accesses only the packet data. However,
> > there's also an XDP RX Metadata area, referenced by the data_meta
> > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > point somewhere ahead of the data buffer, thereby granting the XDP
> > program access to the virtio header located immediately before the
>
> AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> xdp->data_hard_start:
>
> https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
>
> and virtio net set such field after the virtio_net_hdr:
>
> https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
>
> I don't see how the virtio hdr could be touched? Possibly even more
> important: if such thing is possible, I think is should be somewhat
> denied (for the same reason an H/W nic should prevent XDP from
> modifying its own buffer descriptor).

Thank you for highlighting this concern. The header layout differs
slightly between small and mergeable mode. Taking 'mergeable mode' as
an example, after calling xdp_prepare_buff the layout of xdp_buff
would be as depicted in the diagram below,

                      buf
                       |
                       v
        +--------------+--------------+-------------+
        | xdp headroom | virtio header| packet      |
        | (256 bytes)  | (20 bytes)   | content     |
        +--------------+--------------+-------------+
        ^                             ^
        |                             |
 data_hard_start                    data
                                  data_meta

If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
towards 'data_hard_start', it would point to the inline header, thus
potentially allowing the XDP program to access the inline header.

We will take a closer look on how to prevent the inline header from
being altered, possibly by borrowing some ideas from other
xdp_metadata_ops implementation.


Thanks,
Liang

>
> Cheers,
>
> Paolo
>
Paolo Abeni Feb. 9, 2024, 12:57 p.m. UTC | #7
On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > 
> > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > 
> > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > [...]
> > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > >       }
> > > > > > >   }
> > > > > > > 
> > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > +                                  struct net_device *dev,
> > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > +{
> > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > +     }
> > > > > > > +}
> > > > > > > +
> > > > > > 
> > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > bpf_prog calls the kfunc?
> > > > > > 
> > > > > 
> > > > > That seems to be the way v1 works,
> > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > 
> > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > contents/buffer?
> > > > 
> > > 
> > > Normally, an XDP program accesses only the packet data. However,
> > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > program access to the virtio header located immediately before the
> > 
> > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > xdp->data_hard_start:
> > 
> > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > 
> > and virtio net set such field after the virtio_net_hdr:
> > 
> > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > 
> > I don't see how the virtio hdr could be touched? Possibly even more
> > important: if such thing is possible, I think is should be somewhat
> > denied (for the same reason an H/W nic should prevent XDP from
> > modifying its own buffer descriptor).
> 
> Thank you for highlighting this concern. The header layout differs
> slightly between small and mergeable mode. Taking 'mergeable mode' as
> an example, after calling xdp_prepare_buff the layout of xdp_buff
> would be as depicted in the diagram below,
> 
>                       buf
>                        |
>                        v
>         +--------------+--------------+-------------+
>         | xdp headroom | virtio header| packet      |
>         | (256 bytes)  | (20 bytes)   | content     |
>         +--------------+--------------+-------------+
>         ^                             ^
>         |                             |
>  data_hard_start                    data
>                                   data_meta
> 
> If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> towards 'data_hard_start', it would point to the inline header, thus
> potentially allowing the XDP program to access the inline header.

I see. That layout was completely unexpected to me.

AFAICS the virtio_net driver tries to avoid accessing/using the
virtio_net_hdr after the XDP program execution, so nothing tragic
should happen.

@Michael, @Jason, I guess the above is like that by design? Isn't it a
bit fragile?

Thanks!

Paolo
Michael S. Tsirkin Feb. 22, 2024, 8:09 p.m. UTC | #8
On Fri, Feb 09, 2024 at 01:57:25PM +0100, Paolo Abeni wrote:
> On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > 
> > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > 
> > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > [...]
> > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > >       }
> > > > > > > >   }
> > > > > > > > 
> > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > +                                  struct net_device *dev,
> > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > +{
> > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > +     }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > 
> > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > bpf_prog calls the kfunc?
> > > > > > > 
> > > > > > 
> > > > > > That seems to be the way v1 works,
> > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > 
> > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > contents/buffer?
> > > > > 
> > > > 
> > > > Normally, an XDP program accesses only the packet data. However,
> > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > program access to the virtio header located immediately before the
> > > 
> > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > xdp->data_hard_start:
> > > 
> > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > 
> > > and virtio net set such field after the virtio_net_hdr:
> > > 
> > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > 
> > > I don't see how the virtio hdr could be touched? Possibly even more
> > > important: if such thing is possible, I think is should be somewhat
> > > denied (for the same reason an H/W nic should prevent XDP from
> > > modifying its own buffer descriptor).
> > 
> > Thank you for highlighting this concern. The header layout differs
> > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > would be as depicted in the diagram below,
> > 
> >                       buf
> >                        |
> >                        v
> >         +--------------+--------------+-------------+
> >         | xdp headroom | virtio header| packet      |
> >         | (256 bytes)  | (20 bytes)   | content     |
> >         +--------------+--------------+-------------+
> >         ^                             ^
> >         |                             |
> >  data_hard_start                    data
> >                                   data_meta
> > 
> > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > towards 'data_hard_start', it would point to the inline header, thus
> > potentially allowing the XDP program to access the inline header.
> 
> I see. That layout was completely unexpected to me.
> 
> AFAICS the virtio_net driver tries to avoid accessing/using the
> virtio_net_hdr after the XDP program execution, so nothing tragic
> should happen.
> 
> @Michael, @Jason, I guess the above is like that by design? Isn't it a
> bit fragile?
> 
> Thanks!
> 
> Paolo

I agree it is all a bit fragile, not sure how to do better without extra
copies though ...
Xuan Zhuo Feb. 23, 2024, 1:37 a.m. UTC | #9
On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > >
> > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > >
> > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > [...]
> > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > >       }
> > > > > > > >   }
> > > > > > > >
> > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > +                                  struct net_device *dev,
> > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > +{
> > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > +     }
> > > > > > > > +}
> > > > > > > > +
> > > > > > >
> > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > bpf_prog calls the kfunc?
> > > > > > >
> > > > > >
> > > > > > That seems to be the way v1 works,
> > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > >
> > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > contents/buffer?
> > > > >
> > > >
> > > > Normally, an XDP program accesses only the packet data. However,
> > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > program access to the virtio header located immediately before the
> > >
> > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > xdp->data_hard_start:
> > >
> > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > >
> > > and virtio net set such field after the virtio_net_hdr:
> > >
> > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > >
> > > I don't see how the virtio hdr could be touched? Possibly even more
> > > important: if such thing is possible, I think is should be somewhat
> > > denied (for the same reason an H/W nic should prevent XDP from
> > > modifying its own buffer descriptor).
> >
> > Thank you for highlighting this concern. The header layout differs
> > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > would be as depicted in the diagram below,
> >
> >                       buf
> >                        |
> >                        v
> >         +--------------+--------------+-------------+
> >         | xdp headroom | virtio header| packet      |
> >         | (256 bytes)  | (20 bytes)   | content     |
> >         +--------------+--------------+-------------+
> >         ^                             ^
> >         |                             |
> >  data_hard_start                    data
> >                                   data_meta
> >
> > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > towards 'data_hard_start', it would point to the inline header, thus
> > potentially allowing the XDP program to access the inline header.
>
> I see. That layout was completely unexpected to me.
>
> AFAICS the virtio_net driver tries to avoid accessing/using the
> virtio_net_hdr after the XDP program execution, so nothing tragic
> should happen.
>
> @Michael, @Jason, I guess the above is like that by design? Isn't it a
> bit fragile?

YES. We process it carefully. That brings some troubles, we hope to put the
virtio-net header to the vring desc like other NICs. But that is a big project.

I think this patch is ok, this can be merged to net-next firstly.

Thanks.


>
> Thanks!
>
> Paolo
>
Jason Wang Feb. 26, 2024, 4:59 a.m. UTC | #10
On Fri, Feb 23, 2024 at 9:42 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> > On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > >
> > > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > >
> > > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > > [...]
> > > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > > >       }
> > > > > > > > >   }
> > > > > > > > >
> > > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > > +                                  struct net_device *dev,
> > > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > > +{
> > > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > > +     }
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > >
> > > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > > bpf_prog calls the kfunc?
> > > > > > > >
> > > > > > >
> > > > > > > That seems to be the way v1 works,
> > > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > >
> > > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > > contents/buffer?
> > > > > >
> > > > >
> > > > > Normally, an XDP program accesses only the packet data. However,
> > > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > > program access to the virtio header located immediately before the
> > > >
> > > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > > xdp->data_hard_start:
> > > >
> > > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > >
> > > > and virtio net set such field after the virtio_net_hdr:
> > > >
> > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > >
> > > > I don't see how the virtio hdr could be touched? Possibly even more
> > > > important: if such thing is possible, I think is should be somewhat
> > > > denied (for the same reason an H/W nic should prevent XDP from
> > > > modifying its own buffer descriptor).
> > >
> > > Thank you for highlighting this concern. The header layout differs
> > > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > > would be as depicted in the diagram below,
> > >
> > >                       buf
> > >                        |
> > >                        v
> > >         +--------------+--------------+-------------+
> > >         | xdp headroom | virtio header| packet      |
> > >         | (256 bytes)  | (20 bytes)   | content     |
> > >         +--------------+--------------+-------------+
> > >         ^                             ^
> > >         |                             |
> > >  data_hard_start                    data
> > >                                   data_meta
> > >
> > > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > > towards 'data_hard_start', it would point to the inline header, thus
> > > potentially allowing the XDP program to access the inline header.
> >
> > I see. That layout was completely unexpected to me.
> >
> > AFAICS the virtio_net driver tries to avoid accessing/using the
> > virtio_net_hdr after the XDP program execution, so nothing tragic
> > should happen.
> >
> > @Michael, @Jason, I guess the above is like that by design? Isn't it a
> > bit fragile?

Yes.

>
> YES. We process it carefully. That brings some troubles, we hope to put the
> virtio-net header to the vring desc like other NICs. But that is a big project.

Yes, and we still need to support the "legacy" layout.

>
> I think this patch is ok, this can be merged to net-next firstly.

+1

Thanks

>
> Thanks.
>
>
> >
> > Thanks!
> >
> > Paolo
> >
>
John Fastabend Feb. 26, 2024, 8:42 p.m. UTC | #11
Jason Wang wrote:
> On Fri, Feb 23, 2024 at 9:42 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> > > On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > > > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > >
> > > > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > >
> > > > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > > > [...]
> > > > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > > > >       }
> > > > > > > > > >   }
> > > > > > > > > >
> > > > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > > > +                                  struct net_device *dev,
> > > > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > > > +{
> > > > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > > > +     }
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > >
> > > > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > > > bpf_prog calls the kfunc?
> > > > > > > > >
> > > > > > > >
> > > > > > > > That seems to be the way v1 works,
> > > > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > > >
> > > > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > > > contents/buffer?
> > > > > > >
> > > > > >
> > > > > > Normally, an XDP program accesses only the packet data. However,
> > > > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > > > program access to the virtio header located immediately before the
> > > > >
> > > > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > > > xdp->data_hard_start:
> > > > >
> > > > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > > >
> > > > > and virtio net set such field after the virtio_net_hdr:
> > > > >
> > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > > >
> > > > > I don't see how the virtio hdr could be touched? Possibly even more
> > > > > important: if such thing is possible, I think is should be somewhat
> > > > > denied (for the same reason an H/W nic should prevent XDP from
> > > > > modifying its own buffer descriptor).
> > > >
> > > > Thank you for highlighting this concern. The header layout differs
> > > > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > > > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > > > would be as depicted in the diagram below,
> > > >
> > > >                       buf
> > > >                        |
> > > >                        v
> > > >         +--------------+--------------+-------------+
> > > >         | xdp headroom | virtio header| packet      |
> > > >         | (256 bytes)  | (20 bytes)   | content     |
> > > >         +--------------+--------------+-------------+
> > > >         ^                             ^
> > > >         |                             |
> > > >  data_hard_start                    data
> > > >                                   data_meta
> > > >
> > > > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > > > towards 'data_hard_start', it would point to the inline header, thus
> > > > potentially allowing the XDP program to access the inline header.

Fairly late to the thread sorry. Given above layout does it make sense to
just delay extraction to the kfunc as suggested above? Sure the XDP program
could smash the entry in virtio header, but this is already the case for
anything else there. A program writing over the virtio header is likely
buggy anyways. Worse that might happen is bad rss values and mappings?

I like seeing more use cases for the hints though.

Thanks!
John
Liang Chen Feb. 29, 2024, 8:37 a.m. UTC | #12
On Tue, Feb 27, 2024 at 4:42 AM John Fastabend <john.fastabend@gmail.com> wrote:
>
> Jason Wang wrote:
> > On Fri, Feb 23, 2024 at 9:42 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> > > > On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > > > > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > >
> > > > > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > > > > [...]
> > > > > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > > > > >       }
> > > > > > > > > > >   }
> > > > > > > > > > >
> > > > > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > > > > +                                  struct net_device *dev,
> > > > > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > > > > +{
> > > > > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > > > > +     }
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > >
> > > > > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > > > > bpf_prog calls the kfunc?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > That seems to be the way v1 works,
> > > > > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > > > >
> > > > > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > > > > contents/buffer?
> > > > > > > >
> > > > > > >
> > > > > > > Normally, an XDP program accesses only the packet data. However,
> > > > > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > > > > program access to the virtio header located immediately before the
> > > > > >
> > > > > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > > > > xdp->data_hard_start:
> > > > > >
> > > > > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > > > >
> > > > > > and virtio net set such field after the virtio_net_hdr:
> > > > > >
> > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > > > >
> > > > > > I don't see how the virtio hdr could be touched? Possibly even more
> > > > > > important: if such thing is possible, I think is should be somewhat
> > > > > > denied (for the same reason an H/W nic should prevent XDP from
> > > > > > modifying its own buffer descriptor).
> > > > >
> > > > > Thank you for highlighting this concern. The header layout differs
> > > > > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > > > > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > > > > would be as depicted in the diagram below,
> > > > >
> > > > >                       buf
> > > > >                        |
> > > > >                        v
> > > > >         +--------------+--------------+-------------+
> > > > >         | xdp headroom | virtio header| packet      |
> > > > >         | (256 bytes)  | (20 bytes)   | content     |
> > > > >         +--------------+--------------+-------------+
> > > > >         ^                             ^
> > > > >         |                             |
> > > > >  data_hard_start                    data
> > > > >                                   data_meta
> > > > >
> > > > > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > > > > towards 'data_hard_start', it would point to the inline header, thus
> > > > > potentially allowing the XDP program to access the inline header.
>
> Fairly late to the thread sorry. Given above layout does it make sense to
> just delay extraction to the kfunc as suggested above? Sure the XDP program
> could smash the entry in virtio header, but this is already the case for
> anything else there. A program writing over the virtio header is likely
> buggy anyways. Worse that might happen is bad rss values and mappings?

Thank you for raising the concern. I am not quite sure if the XDP
program is considered buggy, as it is agnostic to the layout of the
inline header.
Let's say an XDP program calls bpf_xdp_adjust_meta to adjust data_meta
to point to the inline header and overwrites it without even knowing
of its existence. Later, when the XDP program invokes the kfunc to
retrieve the hash, incorrect data would be returned. In this case, the
XDP program seems to be doing everything legally but ends up with the
wrong hash data.

Thanks,
Liang

>
> I like seeing more use cases for the hints though.
>
> Thanks!
> John
Liang Chen April 1, 2024, 3:38 a.m. UTC | #13
On Thu, Feb 29, 2024 at 4:37 PM Liang Chen <liangchen.linux@gmail.com> wrote:
>
> On Tue, Feb 27, 2024 at 4:42 AM John Fastabend <john.fastabend@gmail.com> wrote:
> >
> > Jason Wang wrote:
> > > On Fri, Feb 23, 2024 at 9:42 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > > > > > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > >
> > > > > > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > > > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > > > > > [...]
> > > > > > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > > > > > >       }
> > > > > > > > > > > >   }
> > > > > > > > > > > >
> > > > > > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > > > > > +                                  struct net_device *dev,
> > > > > > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > > > > > +     }
> > > > > > > > > > > > +}
> > > > > > > > > > > > +
> > > > > > > > > > >
> > > > > > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > > > > > bpf_prog calls the kfunc?
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > That seems to be the way v1 works,
> > > > > > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > > > > >
> > > > > > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > > > > > contents/buffer?
> > > > > > > > >
> > > > > > > >
> > > > > > > > Normally, an XDP program accesses only the packet data. However,
> > > > > > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > > > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > > > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > > > > > program access to the virtio header located immediately before the
> > > > > > >
> > > > > > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > > > > > xdp->data_hard_start:
> > > > > > >
> > > > > > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > > > > >
> > > > > > > and virtio net set such field after the virtio_net_hdr:
> > > > > > >
> > > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > > > > >
> > > > > > > I don't see how the virtio hdr could be touched? Possibly even more
> > > > > > > important: if such thing is possible, I think is should be somewhat
> > > > > > > denied (for the same reason an H/W nic should prevent XDP from
> > > > > > > modifying its own buffer descriptor).
> > > > > >
> > > > > > Thank you for highlighting this concern. The header layout differs
> > > > > > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > > > > > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > > > > > would be as depicted in the diagram below,
> > > > > >
> > > > > >                       buf
> > > > > >                        |
> > > > > >                        v
> > > > > >         +--------------+--------------+-------------+
> > > > > >         | xdp headroom | virtio header| packet      |
> > > > > >         | (256 bytes)  | (20 bytes)   | content     |
> > > > > >         +--------------+--------------+-------------+
> > > > > >         ^                             ^
> > > > > >         |                             |
> > > > > >  data_hard_start                    data
> > > > > >                                   data_meta
> > > > > >
> > > > > > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > > > > > towards 'data_hard_start', it would point to the inline header, thus
> > > > > > potentially allowing the XDP program to access the inline header.
> >
> > Fairly late to the thread sorry. Given above layout does it make sense to
> > just delay extraction to the kfunc as suggested above? Sure the XDP program
> > could smash the entry in virtio header, but this is already the case for
> > anything else there. A program writing over the virtio header is likely
> > buggy anyways. Worse that might happen is bad rss values and mappings?
>
> Thank you for raising the concern. I am not quite sure if the XDP
> program is considered buggy, as it is agnostic to the layout of the
> inline header.
> Let's say an XDP program calls bpf_xdp_adjust_meta to adjust data_meta
> to point to the inline header and overwrites it without even knowing
> of its existence. Later, when the XDP program invokes the kfunc to
> retrieve the hash, incorrect data would be returned. In this case, the
> XDP program seems to be doing everything legally but ends up with the
> wrong hash data.
>
> Thanks,
> Liang
>

I haven’t received any feedback yet, so I’m under the impression that
the XDP program is still considered buggy in the scenario mentioned
above, and the overall behavior is as designed from XDP perspective.
Looking up the intel igc driver, it also does not bother with this
particular aspect.

Given this context, we don't need to be concerned about the hash value
being overwritten. So if there aren't any objections, I plan to remove
the preservation of the hash value in the next iteration.

Thanks,
Liang

> >
> > I like seeing more use cases for the hints though.
> >
> > Thanks!
> > John
Jason Wang April 8, 2024, 6:41 a.m. UTC | #14
On Mon, Apr 1, 2024 at 11:38 AM Liang Chen <liangchen.linux@gmail.com> wrote:
>
> On Thu, Feb 29, 2024 at 4:37 PM Liang Chen <liangchen.linux@gmail.com> wrote:
> >
> > On Tue, Feb 27, 2024 at 4:42 AM John Fastabend <john.fastabend@gmail.com> wrote:
> > >
> > > Jason Wang wrote:
> > > > On Fri, Feb 23, 2024 at 9:42 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > >
> > > > > On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > > > > > > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > > > > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > > > > > > [...]
> > > > > > > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > > > > > > >       }
> > > > > > > > > > > > >   }
> > > > > > > > > > > > >
> > > > > > > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > > > > > > +                                  struct net_device *dev,
> > > > > > > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > > > > > > +     }
> > > > > > > > > > > > > +}
> > > > > > > > > > > > > +
> > > > > > > > > > > >
> > > > > > > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > > > > > > bpf_prog calls the kfunc?
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > That seems to be the way v1 works,
> > > > > > > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > > > > > >
> > > > > > > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > > > > > > contents/buffer?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Normally, an XDP program accesses only the packet data. However,
> > > > > > > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > > > > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > > > > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > > > > > > program access to the virtio header located immediately before the
> > > > > > > >
> > > > > > > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > > > > > > xdp->data_hard_start:
> > > > > > > >
> > > > > > > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > > > > > >
> > > > > > > > and virtio net set such field after the virtio_net_hdr:
> > > > > > > >
> > > > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > > > > > >
> > > > > > > > I don't see how the virtio hdr could be touched? Possibly even more
> > > > > > > > important: if such thing is possible, I think is should be somewhat
> > > > > > > > denied (for the same reason an H/W nic should prevent XDP from
> > > > > > > > modifying its own buffer descriptor).
> > > > > > >
> > > > > > > Thank you for highlighting this concern. The header layout differs
> > > > > > > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > > > > > > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > > > > > > would be as depicted in the diagram below,
> > > > > > >
> > > > > > >                       buf
> > > > > > >                        |
> > > > > > >                        v
> > > > > > >         +--------------+--------------+-------------+
> > > > > > >         | xdp headroom | virtio header| packet      |
> > > > > > >         | (256 bytes)  | (20 bytes)   | content     |
> > > > > > >         +--------------+--------------+-------------+
> > > > > > >         ^                             ^
> > > > > > >         |                             |
> > > > > > >  data_hard_start                    data
> > > > > > >                                   data_meta
> > > > > > >
> > > > > > > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > > > > > > towards 'data_hard_start', it would point to the inline header, thus
> > > > > > > potentially allowing the XDP program to access the inline header.
> > >
> > > Fairly late to the thread sorry. Given above layout does it make sense to
> > > just delay extraction to the kfunc as suggested above? Sure the XDP program
> > > could smash the entry in virtio header, but this is already the case for
> > > anything else there. A program writing over the virtio header is likely
> > > buggy anyways. Worse that might happen is bad rss values and mappings?
> >
> > Thank you for raising the concern. I am not quite sure if the XDP
> > program is considered buggy, as it is agnostic to the layout of the
> > inline header.
> > Let's say an XDP program calls bpf_xdp_adjust_meta to adjust data_meta
> > to point to the inline header and overwrites it without even knowing
> > of its existence. Later, when the XDP program invokes the kfunc to
> > retrieve the hash, incorrect data would be returned. In this case, the
> > XDP program seems to be doing everything legally but ends up with the
> > wrong hash data.
> >
> > Thanks,
> > Liang
> >
>
> I haven’t received any feedback yet, so I’m under the impression that
> the XDP program is still considered buggy in the scenario mentioned
> above, and the overall behavior is as designed from XDP perspective.
> Looking up the intel igc driver, it also does not bother with this
> particular aspect.

So let's post a new version with all the detailed explanations as above and see?

>
> Given this context, we don't need to be concerned about the hash value
> being overwritten. So if there aren't any objections, I plan to remove
> the preservation of the hash value in the next iteration.
>
> Thanks,
> Liang

Thanks

>
> > >
> > > I like seeing more use cases for the hints though.
> > >
> > > Thanks!
> > > John
>
Liang Chen April 11, 2024, 6:09 a.m. UTC | #15
On Mon, Apr 8, 2024 at 2:41 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Apr 1, 2024 at 11:38 AM Liang Chen <liangchen.linux@gmail.com> wrote:
> >
> > On Thu, Feb 29, 2024 at 4:37 PM Liang Chen <liangchen.linux@gmail.com> wrote:
> > >
> > > On Tue, Feb 27, 2024 at 4:42 AM John Fastabend <john.fastabend@gmail.com> wrote:
> > > >
> > > > Jason Wang wrote:
> > > > > On Fri, Feb 23, 2024 at 9:42 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > > > >
> > > > > > On Fri, 09 Feb 2024 13:57:25 +0100, Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > On Fri, 2024-02-09 at 18:39 +0800, Liang Chen wrote:
> > > > > > > > On Wed, Feb 7, 2024 at 10:27 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Wed, 2024-02-07 at 10:54 +0800, Liang Chen wrote:
> > > > > > > > > > On Tue, Feb 6, 2024 at 6:44 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Sat, 2024-02-03 at 10:56 +0800, Liang Chen wrote:
> > > > > > > > > > > > On Sat, Feb 3, 2024 at 12:20 AM Jesper Dangaard Brouer <hawk@kernel.org> wrote:
> > > > > > > > > > > > > On 02/02/2024 13.11, Liang Chen wrote:
> > > > > > > > > > > [...]
> > > > > > > > > > > > > > @@ -1033,6 +1039,16 @@ static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > > > > > > > > >       }
> > > > > > > > > > > > > >   }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > +static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
> > > > > > > > > > > > > > +                                  struct net_device *dev,
> > > > > > > > > > > > > > +                                  struct virtio_net_hdr_v1_hash *hdr_hash)
> > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > +     if (dev->features & NETIF_F_RXHASH) {
> > > > > > > > > > > > > > +             virtnet_xdp->hash_value = hdr_hash->hash_value;
> > > > > > > > > > > > > > +             virtnet_xdp->hash_report = hdr_hash->hash_report;
> > > > > > > > > > > > > > +     }
> > > > > > > > > > > > > > +}
> > > > > > > > > > > > > > +
> > > > > > > > > > > > >
> > > > > > > > > > > > > Would it be possible to store a pointer to hdr_hash in virtnet_xdp_buff,
> > > > > > > > > > > > > with the purpose of delaying extracting this, until and only if XDP
> > > > > > > > > > > > > bpf_prog calls the kfunc?
> > > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > That seems to be the way v1 works,
> > > > > > > > > > > > https://lore.kernel.org/all/20240122102256.261374-1-liangchen.linux@gmail.com/
> > > > > > > > > > > > . But it was pointed out that the inline header may be overwritten by
> > > > > > > > > > > > the xdp prog, so the hash is copied out to maintain its integrity.
> > > > > > > > > > >
> > > > > > > > > > > Why? isn't XDP supposed to get write access only to the pkt
> > > > > > > > > > > contents/buffer?
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Normally, an XDP program accesses only the packet data. However,
> > > > > > > > > > there's also an XDP RX Metadata area, referenced by the data_meta
> > > > > > > > > > pointer. This pointer can be adjusted with bpf_xdp_adjust_meta to
> > > > > > > > > > point somewhere ahead of the data buffer, thereby granting the XDP
> > > > > > > > > > program access to the virtio header located immediately before the
> > > > > > > > >
> > > > > > > > > AFAICS bpf_xdp_adjust_meta() does not allow moving the meta_data before
> > > > > > > > > xdp->data_hard_start:
> > > > > > > > >
> > > > > > > > > https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4210
> > > > > > > > >
> > > > > > > > > and virtio net set such field after the virtio_net_hdr:
> > > > > > > > >
> > > > > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1218
> > > > > > > > > https://elixir.bootlin.com/linux/latest/source/drivers/net/virtio_net.c#L1420
> > > > > > > > >
> > > > > > > > > I don't see how the virtio hdr could be touched? Possibly even more
> > > > > > > > > important: if such thing is possible, I think is should be somewhat
> > > > > > > > > denied (for the same reason an H/W nic should prevent XDP from
> > > > > > > > > modifying its own buffer descriptor).
> > > > > > > >
> > > > > > > > Thank you for highlighting this concern. The header layout differs
> > > > > > > > slightly between small and mergeable mode. Taking 'mergeable mode' as
> > > > > > > > an example, after calling xdp_prepare_buff the layout of xdp_buff
> > > > > > > > would be as depicted in the diagram below,
> > > > > > > >
> > > > > > > >                       buf
> > > > > > > >                        |
> > > > > > > >                        v
> > > > > > > >         +--------------+--------------+-------------+
> > > > > > > >         | xdp headroom | virtio header| packet      |
> > > > > > > >         | (256 bytes)  | (20 bytes)   | content     |
> > > > > > > >         +--------------+--------------+-------------+
> > > > > > > >         ^                             ^
> > > > > > > >         |                             |
> > > > > > > >  data_hard_start                    data
> > > > > > > >                                   data_meta
> > > > > > > >
> > > > > > > > If 'bpf_xdp_adjust_meta' repositions the 'data_meta' pointer a little
> > > > > > > > towards 'data_hard_start', it would point to the inline header, thus
> > > > > > > > potentially allowing the XDP program to access the inline header.
> > > >
> > > > Fairly late to the thread sorry. Given above layout does it make sense to
> > > > just delay extraction to the kfunc as suggested above? Sure the XDP program
> > > > could smash the entry in virtio header, but this is already the case for
> > > > anything else there. A program writing over the virtio header is likely
> > > > buggy anyways. Worse that might happen is bad rss values and mappings?
> > >
> > > Thank you for raising the concern. I am not quite sure if the XDP
> > > program is considered buggy, as it is agnostic to the layout of the
> > > inline header.
> > > Let's say an XDP program calls bpf_xdp_adjust_meta to adjust data_meta
> > > to point to the inline header and overwrites it without even knowing
> > > of its existence. Later, when the XDP program invokes the kfunc to
> > > retrieve the hash, incorrect data would be returned. In this case, the
> > > XDP program seems to be doing everything legally but ends up with the
> > > wrong hash data.
> > >
> > > Thanks,
> > > Liang
> > >
> >
> > I haven’t received any feedback yet, so I’m under the impression that
> > the XDP program is still considered buggy in the scenario mentioned
> > above, and the overall behavior is as designed from XDP perspective.
> > Looking up the intel igc driver, it also does not bother with this
> > particular aspect.
>
> So let's post a new version with all the detailed explanations as above and see?

Sure. Thanks!
>
> >
> > Given this context, we don't need to be concerned about the hash value
> > being overwritten. So if there aren't any objections, I plan to remove
> > the preservation of the hash value in the next iteration.
> >
> > Thanks,
> > Liang
>
> Thanks
>
> >
> > > >
> > > > I like seeing more use cases for the hints though.
> > > >
> > > > Thanks!
> > > > John
> >
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d7ce4a1011ea..7ce666c86ee0 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -349,6 +349,12 @@  struct virtio_net_common_hdr {
 	};
 };
 
+struct virtnet_xdp_buff {
+	struct xdp_buff xdp;
+	__le32 hash_value;
+	__le16 hash_report;
+};
+
 static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
 
 static bool is_xdp_frame(void *ptr)
@@ -1033,6 +1039,16 @@  static void put_xdp_frags(struct xdp_buff *xdp)
 	}
 }
 
+static void virtnet_xdp_save_rx_hash(struct virtnet_xdp_buff *virtnet_xdp,
+				     struct net_device *dev,
+				     struct virtio_net_hdr_v1_hash *hdr_hash)
+{
+	if (dev->features & NETIF_F_RXHASH) {
+		virtnet_xdp->hash_value = hdr_hash->hash_value;
+		virtnet_xdp->hash_report = hdr_hash->hash_report;
+	}
+}
+
 static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
 			       struct net_device *dev,
 			       unsigned int *xdp_xmit,
@@ -1199,9 +1215,10 @@  static struct sk_buff *receive_small_xdp(struct net_device *dev,
 	unsigned int headroom = vi->hdr_len + header_offset;
 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
 	struct page *page = virt_to_head_page(buf);
+	struct virtnet_xdp_buff virtnet_xdp;
 	struct page *xdp_page;
+	struct xdp_buff *xdp;
 	unsigned int buflen;
-	struct xdp_buff xdp;
 	struct sk_buff *skb;
 	unsigned int metasize = 0;
 	u32 act;
@@ -1233,17 +1250,20 @@  static struct sk_buff *receive_small_xdp(struct net_device *dev,
 		page = xdp_page;
 	}
 
-	xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
-	xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
+	xdp = &virtnet_xdp.xdp;
+	xdp_init_buff(xdp, buflen, &rq->xdp_rxq);
+	xdp_prepare_buff(xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
 			 xdp_headroom, len, true);
 
-	act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
+	virtnet_xdp_save_rx_hash(&virtnet_xdp, dev, (void *)hdr);
+
+	act = virtnet_xdp_handler(xdp_prog, xdp, dev, xdp_xmit, stats);
 
 	switch (act) {
 	case XDP_PASS:
 		/* Recalculate length in case bpf program changed it */
-		len = xdp.data_end - xdp.data;
-		metasize = xdp.data - xdp.data_meta;
+		len = xdp->data_end - xdp->data;
+		metasize = xdp->data - xdp->data_meta;
 		break;
 
 	case XDP_TX:
@@ -1254,7 +1274,7 @@  static struct sk_buff *receive_small_xdp(struct net_device *dev,
 		goto err_xdp;
 	}
 
-	skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
+	skb = virtnet_build_skb(buf, buflen, xdp->data - buf, len);
 	if (unlikely(!skb))
 		goto err;
 
@@ -1591,10 +1611,11 @@  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
 	int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 	struct page *page = virt_to_head_page(buf);
 	int offset = buf - page_address(page);
+	struct virtnet_xdp_buff virtnet_xdp;
 	unsigned int xdp_frags_truesz = 0;
 	struct sk_buff *head_skb;
 	unsigned int frame_sz;
-	struct xdp_buff xdp;
+	struct xdp_buff *xdp;
 	void *data;
 	u32 act;
 	int err;
@@ -1604,16 +1625,19 @@  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
 	if (unlikely(!data))
 		goto err_xdp;
 
-	err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
+	xdp = &virtnet_xdp.xdp;
+	err = virtnet_build_xdp_buff_mrg(dev, vi, rq, xdp, data, len, frame_sz,
 					 &num_buf, &xdp_frags_truesz, stats);
 	if (unlikely(err))
 		goto err_xdp;
 
-	act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
+	virtnet_xdp_save_rx_hash(&virtnet_xdp, dev, (void *)hdr);
+
+	act = virtnet_xdp_handler(xdp_prog, xdp, dev, xdp_xmit, stats);
 
 	switch (act) {
 	case XDP_PASS:
-		head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
+		head_skb = build_skb_from_xdp_buff(dev, vi, xdp, xdp_frags_truesz);
 		if (unlikely(!head_skb))
 			break;
 		return head_skb;
@@ -1626,7 +1650,7 @@  static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
 		break;
 	}
 
-	put_xdp_frags(&xdp);
+	put_xdp_frags(xdp);
 
 err_xdp:
 	put_page(page);
@@ -4579,6 +4603,55 @@  static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
 	}
 }
 
+static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
+			       enum xdp_rss_hash_type *rss_type)
+{
+	const struct virtnet_xdp_buff *virtnet_xdp = (void *)_ctx;
+
+	if (!(virtnet_xdp->xdp.rxq->dev->features & NETIF_F_RXHASH))
+		return -ENODATA;
+
+	switch (__le16_to_cpu(virtnet_xdp->hash_report)) {
+	case VIRTIO_NET_HASH_REPORT_TCPv4:
+		*rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
+		break;
+	case VIRTIO_NET_HASH_REPORT_UDPv4:
+		*rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
+		break;
+	case VIRTIO_NET_HASH_REPORT_TCPv6:
+		*rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
+		break;
+	case VIRTIO_NET_HASH_REPORT_UDPv6:
+		*rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
+		break;
+	case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
+		*rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
+		break;
+	case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
+		*rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
+		break;
+	case VIRTIO_NET_HASH_REPORT_IPv4:
+		*rss_type = XDP_RSS_TYPE_L3_IPV4;
+		break;
+	case VIRTIO_NET_HASH_REPORT_IPv6:
+		*rss_type = XDP_RSS_TYPE_L3_IPV6;
+		break;
+	case VIRTIO_NET_HASH_REPORT_IPv6_EX:
+		*rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
+		break;
+	case VIRTIO_NET_HASH_REPORT_NONE:
+	default:
+		*rss_type = XDP_RSS_TYPE_NONE;
+	}
+
+	*hash = __le32_to_cpu(virtnet_xdp->hash_value);
+	return 0;
+}
+
+static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
+	.xmo_rx_hash			= virtnet_xdp_rx_hash,
+};
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int i, err = -ENOMEM;
@@ -4704,6 +4777,7 @@  static int virtnet_probe(struct virtio_device *vdev)
 				  VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
 
 		dev->hw_features |= NETIF_F_RXHASH;
+		dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
 	}
 
 	if (vi->has_rss_hash_report)