diff mbox series

[1/1] net: tun: add XDP metadata support

Message ID 20250130171614.1657224-2-marcus.wichelmann@hetzner-cloud.de (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series XDP metadata support for tun driver | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 82 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-01-30--18-00 (tests: 885)

Commit Message

Marcus Wichelmann Jan. 30, 2025, 5:16 p.m. UTC
Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
by the tun driver. This is useful to pass metadata from an XDP program
that's attached to a tap device to following XDP/TC programs.

When used together with vhost_net, the batched XDP buffers were already
initialized with metadata support by the vhost_net driver, but the
metadata was not yet passed to the skb on XDP_PASS. So this also adds
the required skb_metadata_set calls.

Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
---
 drivers/net/tun.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

Comments

Stanislav Fomichev Jan. 30, 2025, 11:16 p.m. UTC | #1
On 01/30, Marcus Wichelmann wrote:
> Enable the support for bpf_xdp_adjust_meta for XDP buffers initialized
> by the tun driver. This is useful to pass metadata from an XDP program
> that's attached to a tap device to following XDP/TC programs.
> 
> When used together with vhost_net, the batched XDP buffers were already
> initialized with metadata support by the vhost_net driver, but the
> metadata was not yet passed to the skb on XDP_PASS. So this also adds
> the required skb_metadata_set calls.

Can you expand more on what kind of metadata is present with vhost_net
and who fills it in? Is it virtio header stuff? I wonder how you
want to consume it..

Can you also add a selftest to use this new functionality?

> Signed-off-by: Marcus Wichelmann <marcus.wichelmann@hetzner-cloud.de>
> ---
>  drivers/net/tun.c | 23 ++++++++++++++++++-----
>  1 file changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index e816aaba8..d3cfea40a 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1600,7 +1600,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
>  
>  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  				       struct page_frag *alloc_frag, char *buf,
> -				       int buflen, int len, int pad)
> +				       int buflen, int len, int pad,
> +				       int metasize)
>  {
>  	struct sk_buff *skb = build_skb(buf, buflen);
>  
> @@ -1609,6 +1610,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
>  
>  	skb_reserve(skb, pad);
>  	skb_put(skb, len);
> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
>  	skb_set_owner_w(skb, tfile->socket.sk);
>  
>  	get_page(alloc_frag->page);
> @@ -1668,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	char *buf;
>  	size_t copied;
>  	int pad = TUN_RX_PAD;
> +	int metasize = 0;
>  	int err = 0;
>  
>  	rcu_read_lock();
> @@ -1695,7 +1699,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  	if (hdr->gso_type || !xdp_prog) {
>  		*skb_xdp = 1;
>  		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
> -				       pad);
> +				       pad, metasize);
>  	}
>  
>  	*skb_xdp = 0;
> @@ -1709,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  		u32 act;
>  
>  		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
> -		xdp_prepare_buff(&xdp, buf, pad, len, false);
> +		xdp_prepare_buff(&xdp, buf, pad, len, true);
>  
>  		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>  		if (act == XDP_REDIRECT || act == XDP_TX) {
> @@ -1730,12 +1734,16 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>  
>  		pad = xdp.data - xdp.data_hard_start;
>  		len = xdp.data_end - xdp.data;
> +
> +		metasize = xdp.data - xdp.data_meta;

[..]

> +		metasize = metasize > 0 ? metasize : 0;

Why is this part needed?
diff mbox series

Patch

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e816aaba8..d3cfea40a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1600,7 +1600,8 @@  static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
 
 static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
 				       struct page_frag *alloc_frag, char *buf,
-				       int buflen, int len, int pad)
+				       int buflen, int len, int pad,
+				       int metasize)
 {
 	struct sk_buff *skb = build_skb(buf, buflen);
 
@@ -1609,6 +1610,8 @@  static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
 
 	skb_reserve(skb, pad);
 	skb_put(skb, len);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 	skb_set_owner_w(skb, tfile->socket.sk);
 
 	get_page(alloc_frag->page);
@@ -1668,6 +1671,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	char *buf;
 	size_t copied;
 	int pad = TUN_RX_PAD;
+	int metasize = 0;
 	int err = 0;
 
 	rcu_read_lock();
@@ -1695,7 +1699,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	if (hdr->gso_type || !xdp_prog) {
 		*skb_xdp = 1;
 		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
-				       pad);
+				       pad, metasize);
 	}
 
 	*skb_xdp = 0;
@@ -1709,7 +1713,7 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		u32 act;
 
 		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
-		xdp_prepare_buff(&xdp, buf, pad, len, false);
+		xdp_prepare_buff(&xdp, buf, pad, len, true);
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		if (act == XDP_REDIRECT || act == XDP_TX) {
@@ -1730,12 +1734,16 @@  static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 
 		pad = xdp.data - xdp.data_hard_start;
 		len = xdp.data_end - xdp.data;
+
+		metasize = xdp.data - xdp.data_meta;
+		metasize = metasize > 0 ? metasize : 0;
 	}
 	bpf_net_ctx_clear(bpf_net_ctx);
 	rcu_read_unlock();
 	local_bh_enable();
 
-	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
+	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
+			       metasize);
 
 out:
 	bpf_net_ctx_clear(bpf_net_ctx);
@@ -2452,6 +2460,7 @@  static int tun_xdp_one(struct tun_struct *tun,
 	struct sk_buff_head *queue;
 	u32 rxhash = 0, act;
 	int buflen = hdr->buflen;
+	int metasize = 0;
 	int ret = 0;
 	bool skb_xdp = false;
 	struct page *page;
@@ -2467,7 +2476,6 @@  static int tun_xdp_one(struct tun_struct *tun,
 		}
 
 		xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
-		xdp_set_data_meta_invalid(xdp);
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
 		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
@@ -2507,6 +2515,11 @@  static int tun_xdp_one(struct tun_struct *tun,
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	skb_put(skb, xdp->data_end - xdp->data);
 
+	metasize = xdp->data - xdp->data_meta;
+	metasize = metasize > 0 ? metasize : 0;
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
 	if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
 		atomic_long_inc(&tun->rx_frame_errors);
 		kfree_skb(skb);