diff mbox series

[bpf-next,1/3] bpf: Support setting variable-length tunnel options

Message ID 20220821161740.166682-2-shmulik.ladkani@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support setting variable-length tunnel options | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-4 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 105798 this patch: 105798
netdev/cc_maintainers warning 13 maintainers not CCed: john.fastabend@gmail.com song@kernel.org sdf@google.com martin.lau@linux.dev davem@davemloft.net netdev@vger.kernel.org edumazet@google.com kpsingh@kernel.org kuba@kernel.org jolsa@kernel.org pabeni@redhat.com haoluo@google.com yhs@fb.com
netdev/build_clang success Errors and warnings before: 173 this patch: 173
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 107672 this patch: 107672
netdev/checkpatch warning WARNING: From:/Signed-off-by: email address mismatch: 'From: Shmulik Ladkani <shmulik@metanetworks.com>' != 'Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>' WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline fail Was 0 now: 1
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on ubuntu-latest with llvm-16
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Kernel LATEST on z15 with gcc

Commit Message

Shmulik Ladkani Aug. 21, 2022, 4:17 p.m. UTC
Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given
an option buffer (ARG_PTR_TO_MEM|MEM_RDONLY) and the compile-time
fixed buffer size (ARG_CONST_SIZE).

However, in certain cases we wish to set tunnel options of dynamic
length.

For example, we have an ebpf program that gets geneve options on
incoming packets, stores them into a map (using a key representing
the incoming flow), and later needs to assign *same* options to
reply packets (belonging to same flow).

This is currently imposssibly without knowing sender's exact geneve
options length, which unfortunately is dymamic.

Introduce 'skb_set_var_tunnel_opt'. This is a variant of
'bpf_skb_set_tunnel_opt' which gets an *additional* parameter 'len',
which is the byte length from 'opt' buffer to copy into ip_tunnnel_info.

The 'size' parameter is kept ARG_CONST_SIZE. This way, verifier can still
safe-guard buffer access. 'len' must never exceed 'size', o/w EINVAL is
returned.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
---
 include/uapi/linux/bpf.h       | 12 ++++++++++++
 net/core/filter.c              | 34 +++++++++++++++++++++++++++++++---
 tools/include/uapi/linux/bpf.h | 12 ++++++++++++
 3 files changed, 55 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 934a2a8beb87..1b965dfd0c80 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5355,6 +5355,17 @@  union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
+ *	Description
+ *		Set tunnel options metadata for the packet associated to *skb*
+ *		to the variable length *len* bytes of option data contained in
+ *		the raw buffer *opt* sized *size*.
+ *
+ *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *		helper for additional information.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5566,6 +5577,7 @@  union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(skb_set_var_tunnel_opt),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 1acfaffeaf32..894d780aff16 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4669,8 +4669,8 @@  static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
-	   const u8 *, from, u32, size)
+static inline u64 __bpf_skb_set_tunopt(struct sk_buff *skb,
+				       const u8 *from, u32 size, u32 len)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
 	const struct metadata_dst *md = this_cpu_ptr(md_dst);
@@ -4679,12 +4679,26 @@  BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
 		return -EINVAL;
 	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
 		return -ENOMEM;
+	if (unlikely(len > size))
+		return -EINVAL;
 
-	ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
+	ip_tunnel_info_opts_set(info, from, len, TUNNEL_OPTIONS_PRESENT);
 
 	return 0;
 }
 
+BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
+	   const u8 *, from, u32, size)
+{
+	return __bpf_skb_set_tunopt(skb, from, size, size);
+}
+
+BPF_CALL_4(bpf_skb_set_var_tunnel_opt, struct sk_buff *, skb,
+	   const u8 *, from, u32, size, u32, len)
+{
+	return __bpf_skb_set_tunopt(skb, from, size, len);
+}
+
 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
 	.func		= bpf_skb_set_tunnel_opt,
 	.gpl_only	= false,
@@ -4694,6 +4708,16 @@  static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
 	.arg3_type	= ARG_CONST_SIZE,
 };
 
+static const struct bpf_func_proto bpf_skb_set_var_tunnel_opt_proto = {
+	.func		= bpf_skb_set_var_tunnel_opt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 {
@@ -4714,6 +4738,8 @@  bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 		return &bpf_skb_set_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_opt:
 		return &bpf_skb_set_tunnel_opt_proto;
+	case BPF_FUNC_skb_set_var_tunnel_opt:
+		return &bpf_skb_set_var_tunnel_opt_proto;
 	default:
 		return NULL;
 	}
@@ -7826,6 +7852,7 @@  tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_get_tunnel_opt:
 		return &bpf_skb_get_tunnel_opt_proto;
 	case BPF_FUNC_skb_set_tunnel_opt:
+	case BPF_FUNC_skb_set_var_tunnel_opt:
 		return bpf_get_skb_set_tunnel_proto(func_id);
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
@@ -8169,6 +8196,7 @@  lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_get_tunnel_opt:
 		return &bpf_skb_get_tunnel_opt_proto;
 	case BPF_FUNC_skb_set_tunnel_opt:
+	case BPF_FUNC_skb_set_var_tunnel_opt:
 		return bpf_get_skb_set_tunnel_proto(func_id);
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1d6085e15fc8..1a1083db5b7a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5355,6 +5355,17 @@  union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_skb_set_var_tunnel_opt(struct sk_buff *skb, void *opt, u32 size, u32 len)
+ *	Description
+ *		Set tunnel options metadata for the packet associated to *skb*
+ *		to the variable length *len* bytes of option data contained in
+ *		the raw buffer *opt* sized *size*.
+ *
+ *		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ *		helper for additional information.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5566,6 +5577,7 @@  union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(skb_set_var_tunnel_opt),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper