diff mbox series

[v3,bpf-next,1/3] bpf: Support setting variable-length tunnel options

Message ID 20220822052152.378622-2-shmulik.ladkani@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support setting variable-length tunnel options | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-4 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on ubuntu-latest with llvm-16
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Kernel LATEST on z15 with gcc
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 105798 this patch: 105798
netdev/cc_maintainers warning 13 maintainers not CCed: john.fastabend@gmail.com song@kernel.org sdf@google.com martin.lau@linux.dev davem@davemloft.net netdev@vger.kernel.org edumazet@google.com kpsingh@kernel.org kuba@kernel.org jolsa@kernel.org pabeni@redhat.com haoluo@google.com yhs@fb.com
netdev/build_clang success Errors and warnings before: 173 this patch: 173
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 107672 this patch: 107672
netdev/checkpatch warning WARNING: From:/Signed-off-by: email address mismatch: 'From: Shmulik Ladkani <shmulik@metanetworks.com>' != 'Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>' WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Shmulik Ladkani Aug. 22, 2022, 5:21 a.m. UTC
Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given
an option buffer (ARG_PTR_TO_MEM|MEM_RDONLY) and the compile-time
fixed buffer size (ARG_CONST_SIZE).

However, in certain cases we wish to set tunnel options of dynamic
length.

For example, we have an ebpf program that gets geneve options on
incoming packets, stores them into a map (using a key representing
the incoming flow), and later needs to assign *same* options to
reply packets (belonging to same flow).

This is currently imposssibly without knowing sender's exact geneve
options length, which unfortunately is dymamic.

Introduce 'skb_set_var_tunnel_opt'. This is a variant of
'bpf_skb_set_tunnel_opt' which gets an *additional* parameter 'len',
which is the byte length from 'opt' buffer to copy into ip_tunnnel_info.

The 'size' parameter is kept ARG_CONST_SIZE. This way, verifier can still
safe-guard buffer access. 'len' must never exceed 'size', o/w EINVAL is
returned.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
---
v3: Avoid 'inline' for the __bpf_skb_set_tunopt helper function
---
 include/uapi/linux/bpf.h       | 12 ++++++++++++
 net/core/filter.c              | 34 +++++++++++++++++++++++++++++++---
 tools/include/uapi/linux/bpf.h | 12 ++++++++++++
 3 files changed, 55 insertions(+), 3 deletions(-)

Comments

John Fastabend Aug. 23, 2022, 7:59 a.m. UTC | #1
Shmulik Ladkani wrote:
> Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given
> an option buffer (ARG_PTR_TO_MEM|MEM_RDONLY) and the compile-time
> fixed buffer size (ARG_CONST_SIZE).
> 
> However, in certain cases we wish to set tunnel options of dynamic
> length.
> 
> For example, we have an ebpf program that gets geneve options on
> incoming packets, stores them into a map (using a key representing
> the incoming flow), and later needs to assign *same* options to
> reply packets (belonging to same flow).
> 
> This is currently imposssibly without knowing sender's exact geneve
> options length, which unfortunately is dymamic.
> 
> Introduce 'skb_set_var_tunnel_opt'. This is a variant of
> 'bpf_skb_set_tunnel_opt' which gets an *additional* parameter 'len',
> which is the byte length from 'opt' buffer to copy into ip_tunnnel_info.
> 
> The 'size' parameter is kept ARG_CONST_SIZE. This way, verifier can still
> safe-guard buffer access. 'len' must never exceed 'size', o/w EINVAL is
> returned.
> 
> Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
> ---
> v3: Avoid 'inline' for the __bpf_skb_set_tunopt helper function
> ---
>  include/uapi/linux/bpf.h       | 12 ++++++++++++
>  net/core/filter.c              | 34 +++++++++++++++++++++++++++++++---
>  tools/include/uapi/linux/bpf.h | 12 ++++++++++++
>  3 files changed, 55 insertions(+), 3 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 934a2a8beb87..1b965dfd0c80 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5355,6 +5355,17 @@ union bpf_attr {
>   *	Return
>   *		Current *ktime*.
>   *
> + * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
> + *	Description
> + *		Set tunnel options metadata for the packet associated to *skb*
> + *		to the variable length *len* bytes of option data contained in
> + *		the raw buffer *opt* sized *size*.
> + *
> + *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> + *		helper for additional information.
> + *	Return
> + *		0 on success, or a negative error in case of failure.

This API feels akward to me. Could you collapse this by using a dynamic pointer,
recently added? And drop the ptr_to_mem+const_size part at least? That seems
redundant with latest kernels.

And then is there a case where size != len? Probably I guess? Anyways having
a signature like tunnel_otpion(skb, opt, len) looks a lot like memcpy to me
and feels familiar.

[...]

>  
> +static const struct bpf_func_proto bpf_skb_set_var_tunnel_opt_proto = {
> +	.func		= bpf_skb_set_var_tunnel_opt,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
> +	.arg3_type	= ARG_CONST_SIZE,
> +	.arg4_type	= ARG_ANYTHING,
> +};
> +
Shmulik Ladkani Aug. 23, 2022, 9:47 a.m. UTC | #2
On Tue, 23 Aug 2022 00:59:01 -0700
John Fastabend <john.fastabend@gmail.com> wrote:

> This API feels akward to me. Could you collapse this by using a dynamic pointer,
> recently added? And drop the ptr_to_mem+const_size part at least? That seems
> redundant with latest kernels.

Hoo, nice. Wasn't aware of this new bpf_dynptr_kern, thanks!
Will resubmit with a signature that gets 'opts' as ARG_PTR_TO_DYNPTR.

Best
Shmulik
Shmulik Ladkani Aug. 31, 2022, 8:34 a.m. UTC | #3
On Tue, 23 Aug 2022 00:59:01 -0700
John Fastabend <john.fastabend@gmail.com> wrote:

> > + * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
> > + *	Description
> > + *		Set tunnel options metadata for the packet associated to *skb*
> > + *		to the variable length *len* bytes of option data contained in
> > + *		the raw buffer *opt* sized *size*.
> > + *
> > + *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> > + *		helper for additional information.
> > + *	Return
> > + *		0 on success, or a negative error in case of failure.  
> 
> This API feels akward to me. Could you collapse this by using a dynamic pointer,
> recently added? And drop the ptr_to_mem+const_size part at least? That seems
> redundant with latest kernels.

Revisiting this decision.

After following that path, seems to me that adding the newly proposed
'bpf_skb_set_tunnel_opt_dynptr' API creates awkwardness in user's bpf
program.

Suppose user needs to hold a map of the options received on incoming
traffic based on whatever 'bpf_skb_get_tunnel_opt' returns.

Then, when user needs to apply the options on the return traffic, we
have the following two alternative APIs:


option A: bpf_skb_set_var_tunnel_opt
------------------------------------

struct tun_opts {
    __u8 data[MAX_OPT_SZ];
    __u32 len;
};
BPF_MAP_TYPE_HASH opts_map; // __type(value, tun_opts)

  ...

  struct tun_opts *opts;

  opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
  bpf_skb_set_var_tunnel_opt(skb, opts->data, sizeof(opts->data), opts->len);


option B: bpf_skb_set_tunnel_opt_dynptr
---------------------------------------

struct tun_opts {
    __u8 data[MAX_OPT_SZ];
};
BPF_MAP_TYPE_HASH opts_map;       // __type(value, tun_opts)
BPF_MAP_TYPE_HASH opts_len_map;   // __type(value, __u32)

  ... 

  struct bpf_dynptr dptr;
  struct tun_opts *opts;
  __u32 *opts_len;

  opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
  opts_len = bpf_map_lookup_elem(&opts_len_map, &the_flow_key);

  bpf_dynptr_from_mem(opts, sizeof(*opts), 0, &dptr);  // construct a dynptr from the raw option data
  bpf_dynptr_trim(&dptr, opts_len);                    // trim it based on stored option len
  bpf_skb_set_tunnel_opt_dynptr(skb, &dptr);


IMO, the 2nd user program is less readable:
 - need to store the received options length in a separate map
 - 5 bpf function calls instead of 2

Despite the awkwardness of the 'bpf_skb_set_var_tunnel_opt' API (passing
both constant size *and* dynamic len), it really creates more simple and
readable ebpf programs.

WDYT?

Best
Shmulik
Joanne Koong Aug. 31, 2022, 7:07 p.m. UTC | #4
On Wed, Aug 31, 2022 at 1:44 AM Shmulik Ladkani
<shmulik@metanetworks.com> wrote:
>
> On Tue, 23 Aug 2022 00:59:01 -0700
> John Fastabend <john.fastabend@gmail.com> wrote:
>
> > > + * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
> > > + * Description
> > > + *         Set tunnel options metadata for the packet associated to *skb*
> > > + *         to the variable length *len* bytes of option data contained in
> > > + *         the raw buffer *opt* sized *size*.
> > > + *
> > > + *         See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> > > + *         helper for additional information.
> > > + * Return
> > > + *         0 on success, or a negative error in case of failure.
> >
> > This API feels akward to me. Could you collapse this by using a dynamic pointer,
> > recently added? And drop the ptr_to_mem+const_size part at least? That seems
> > redundant with latest kernels.
>
> Revisiting this decision.
>
> After following that path, seems to me that adding the newly proposed
> 'bpf_skb_set_tunnel_opt_dynptr' API creates awkwardness in user's bpf
> program.
>
> Suppose user needs to hold a map of the options received on incoming
> traffic based on whatever 'bpf_skb_get_tunnel_opt' returns.
>
> Then, when user needs to apply the options on the return traffic, we
> have the following two alternative APIs:
>
>
> option A: bpf_skb_set_var_tunnel_opt
> ------------------------------------
>
> struct tun_opts {
>     __u8 data[MAX_OPT_SZ];
>     __u32 len;
> };
> BPF_MAP_TYPE_HASH opts_map; // __type(value, tun_opts)
>
>   ...
>
>   struct tun_opts *opts;
>
>   opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
>   bpf_skb_set_var_tunnel_opt(skb, opts->data, sizeof(opts->data), opts->len);
>
>
> option B: bpf_skb_set_tunnel_opt_dynptr
> ---------------------------------------
>
> struct tun_opts {
>     __u8 data[MAX_OPT_SZ];
> };
> BPF_MAP_TYPE_HASH opts_map;       // __type(value, tun_opts)
> BPF_MAP_TYPE_HASH opts_len_map;   // __type(value, __u32)
>
>   ...
>
>   struct bpf_dynptr dptr;
>   struct tun_opts *opts;
>   __u32 *opts_len;
>
>   opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
>   opts_len = bpf_map_lookup_elem(&opts_len_map, &the_flow_key);
>
>   bpf_dynptr_from_mem(opts, sizeof(*opts), 0, &dptr);  // construct a dynptr from the raw option data
>   bpf_dynptr_trim(&dptr, opts_len);                    // trim it based on stored option len
>   bpf_skb_set_tunnel_opt_dynptr(skb, &dptr);
>
>
> IMO, the 2nd user program is less readable:
>  - need to store the received options length in a separate map
>  - 5 bpf function calls instead of 2

I don't think you need a separate map to store the opts length. I
think you can store

struct tun_opts {
     __u8 data[MAX_OPT_SZ];
     __u32 len;
};

in the map, and then do something like:

struct bpf_dynptr ptr;
struct tun_opts *opts;

opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
bpf_skb_set_tunnel_opt_dynptr(skb, &ptr);

where inside the bpf_skb_set_tunnel_opt_dynptr() helper, you can parse
the dynptr data back to a "struct tun_opts" (after doing requisite
checks, eg making sure dynptr size matches sizeof(struct tun_opts))
and then access tun_opts->data and tun_opts->len accordingly.

What are your thoughts?

>
> Despite the awkwardness of the 'bpf_skb_set_var_tunnel_opt' API (passing
> both constant size *and* dynamic len), it really creates more simple and
> readable ebpf programs.
>
> WDYT?
>
> Best
> Shmulik
>
Shmulik Ladkani Aug. 31, 2022, 7:40 p.m. UTC | #5
On Wed, 31 Aug 2022 12:07:28 -0700
Joanne Koong <joannelkoong@gmail.com> wrote:

> > option B: bpf_skb_set_tunnel_opt_dynptr
> > ---------------------------------------
> >
> > struct tun_opts {
> >     __u8 data[MAX_OPT_SZ];
> > };
> > BPF_MAP_TYPE_HASH opts_map;       // __type(value, tun_opts)
> > BPF_MAP_TYPE_HASH opts_len_map;   // __type(value, __u32)
> >
> >   ...
> >
> >   struct bpf_dynptr dptr;
> >   struct tun_opts *opts;
> >   __u32 *opts_len;
> >
> >   opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
> >   opts_len = bpf_map_lookup_elem(&opts_len_map, &the_flow_key);
> >
> >   bpf_dynptr_from_mem(opts, sizeof(*opts), 0, &dptr);  // construct a dynptr from the raw option data
> >   bpf_dynptr_trim(&dptr, opts_len);                    // trim it based on stored option len
> >   bpf_skb_set_tunnel_opt_dynptr(skb, &dptr);
> >
> >
> > IMO, the 2nd user program is less readable:
> >  - need to store the received options length in a separate map
> >  - 5 bpf function calls instead of 2  
> 
> I don't think you need a separate map to store the opts length.

You are correct, I was under the impression that 'bpf_dynptr_from_mem'
will *only* accept the exact pointer to the map element, due to the
verifier checking this:

	case BPF_FUNC_dynptr_from_mem:
		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",

However I found out this is also allowed by the verifier:

struct tun_opts {
    __u32 len;
    __u8 data[MAX_OPT_SZ];
};

  opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
  bpf_dynptr_from_mem(opts->data, sizeof(opts->data), 0, &dptr);
  bpf_dynptr_trim(&dptr, opts->len);
  bpf_skb_set_tunnel_opt_dynptr(skb, &dptr);

Nevertheless, IMO it doesn't change the argument about the better
readability and simplicity of the "bpf_skb_set_var_tunnel_opt" approach:

  opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
  bpf_skb_set_var_tunnel_opt(skb, opts->data, sizeof(opts->data), opts->len);

which is more straight forward.
                       
It even allows the bpf program to set tunnel options *without* using a
map element at all, e.g. this usecase:

  __u8 data[MAX_OPT_SZ];

  len = bpf_skb_get_tunnel_opt(skb, data, sizeof(data));
  ...
  // fiddle with the skb, the tunnel key, the tunnel remote, or whatever.
  // then send again on a collect-md interface, setting same tunnel
  // opts as seen.
  ...
  bpf_skb_set_var_tunnel_opt(skb, data, sizeof(data), len);
  bpf_redirect()

Best,
Shmulik
Shmulik Ladkani Sept. 2, 2022, 3:51 p.m. UTC | #6
> On Tue, 23 Aug 2022 00:59:01 -0700
> John Fastabend <john.fastabend@gmail.com> wrote:
> 
> > > + * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
> > > + *	Description
> > > + *		Set tunnel options metadata for the packet associated to *skb*
> > > + *		to the variable length *len* bytes of option data contained in
> > > + *		the raw buffer *opt* sized *size*.
> > > + *
> > > + *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> > > + *		helper for additional information.
> > > + *	Return
> > > + *		0 on success, or a negative error in case of failure.    
> > 
> > This API feels akward to me. Could you collapse this by using a dynamic pointer,
> > recently added? And drop the ptr_to_mem+const_size part at least? That seems
> > redundant with latest kernels.  
> 
> Revisiting this decision.
> 
> After following that path, seems to me that adding the newly proposed
> 'bpf_skb_set_tunnel_opt_dynptr' API creates awkwardness in user's bpf
> program.
> 
> Suppose user needs to hold a map of the options received on incoming
> traffic based on whatever 'bpf_skb_get_tunnel_opt' returns.
> 
> Then, when user needs to apply the options on the return traffic, we
> have the following two alternative APIs:
> 
> 
> option A: bpf_skb_set_var_tunnel_opt
> ------------------------------------
> 
> struct tun_opts {
>     __u8 data[MAX_OPT_SZ];
>     __u32 len;
> };
> BPF_MAP_TYPE_HASH opts_map; // __type(value, tun_opts)
> 
>   ...
> 
>   struct tun_opts *opts;
> 
>   opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
>   bpf_skb_set_var_tunnel_opt(skb, opts->data, sizeof(opts->data), opts->len);
> 
> 
> option B: bpf_skb_set_tunnel_opt_dynptr
> ---------------------------------------
> 
> struct tun_opts {
>     __u8 data[MAX_OPT_SZ];
> };
> BPF_MAP_TYPE_HASH opts_map;       // __type(value, tun_opts)
> BPF_MAP_TYPE_HASH opts_len_map;   // __type(value, __u32)
> 
>   ... 
> 
>   struct bpf_dynptr dptr;
>   struct tun_opts *opts;
>   __u32 *opts_len;
> 
>   opts = bpf_map_lookup_elem(&opts_map, &the_flow_key);
>   opts_len = bpf_map_lookup_elem(&opts_len_map, &the_flow_key);
> 
>   bpf_dynptr_from_mem(opts, sizeof(*opts), 0, &dptr);  // construct a dynptr from the raw option data
>   bpf_dynptr_trim(&dptr, opts_len);                    // trim it based on stored option len
>   bpf_skb_set_tunnel_opt_dynptr(skb, &dptr);
> 
> 
> IMO, the 2nd user program is less readable:
>  - need to store the received options length in a separate map
>  - 5 bpf function calls instead of 2
> 
> Despite the awkwardness of the 'bpf_skb_set_var_tunnel_opt' API (passing
> both constant size *and* dynamic len), it really creates more simple and
> readable ebpf programs.
> 
> WDYT?

John, Daniel, would appreciate your opinion re the prefered BPF API we
add to set var-length tunnel options. See the difference in bpf user
programs based on the 2 suggestions above.

Thanks,
Shmulik
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 934a2a8beb87..1b965dfd0c80 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5355,6 +5355,17 @@  union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
+ *	Description
+ *		Set tunnel options metadata for the packet associated to *skb*
+ *		to the variable length *len* bytes of option data contained in
+ *		the raw buffer *opt* sized *size*.
+ *
+ *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *		helper for additional information.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5566,6 +5577,7 @@  union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(skb_set_var_tunnel_opt),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 1acfaffeaf32..02161a3344fd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4669,8 +4669,8 @@  static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
-	   const u8 *, from, u32, size)
+static u64 __bpf_skb_set_tunopt(struct sk_buff *skb,
+				const u8 *from, u32 size, u32 len)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
 	const struct metadata_dst *md = this_cpu_ptr(md_dst);
@@ -4679,12 +4679,26 @@  BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
 		return -EINVAL;
 	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
 		return -ENOMEM;
+	if (unlikely(len > size))
+		return -EINVAL;
 
-	ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
+	ip_tunnel_info_opts_set(info, from, len, TUNNEL_OPTIONS_PRESENT);
 
 	return 0;
 }
 
+BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
+	   const u8 *, from, u32, size)
+{
+	return __bpf_skb_set_tunopt(skb, from, size, size);
+}
+
+BPF_CALL_4(bpf_skb_set_var_tunnel_opt, struct sk_buff *, skb,
+	   const u8 *, from, u32, size, u32, len)
+{
+	return __bpf_skb_set_tunopt(skb, from, size, len);
+}
+
 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
 	.func		= bpf_skb_set_tunnel_opt,
 	.gpl_only	= false,
@@ -4694,6 +4708,16 @@  static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
 	.arg3_type	= ARG_CONST_SIZE,
 };
 
+static const struct bpf_func_proto bpf_skb_set_var_tunnel_opt_proto = {
+	.func		= bpf_skb_set_var_tunnel_opt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 {
@@ -4714,6 +4738,8 @@  bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 		return &bpf_skb_set_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_opt:
 		return &bpf_skb_set_tunnel_opt_proto;
+	case BPF_FUNC_skb_set_var_tunnel_opt:
+		return &bpf_skb_set_var_tunnel_opt_proto;
 	default:
 		return NULL;
 	}
@@ -7826,6 +7852,7 @@  tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_get_tunnel_opt:
 		return &bpf_skb_get_tunnel_opt_proto;
 	case BPF_FUNC_skb_set_tunnel_opt:
+	case BPF_FUNC_skb_set_var_tunnel_opt:
 		return bpf_get_skb_set_tunnel_proto(func_id);
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
@@ -8169,6 +8196,7 @@  lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_get_tunnel_opt:
 		return &bpf_skb_get_tunnel_opt_proto;
 	case BPF_FUNC_skb_set_tunnel_opt:
+	case BPF_FUNC_skb_set_var_tunnel_opt:
 		return bpf_get_skb_set_tunnel_proto(func_id);
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1d6085e15fc8..1a1083db5b7a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5355,6 +5355,17 @@  union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
+ *	Description
+ *		Set tunnel options metadata for the packet associated to *skb*
+ *		to the variable length *len* bytes of option data contained in
+ *		the raw buffer *opt* sized *size*.
+ *
+ *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *		helper for additional information.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5566,6 +5577,7 @@  union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(skb_set_var_tunnel_opt),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper