Message ID | 20220911122328.306188-3-shmulik.ladkani@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | bpf: Support setting variable-length tunnel options | expand |
On Sun, Sep 11, 2022 at 5:23 AM Shmulik Ladkani <shmulik@metanetworks.com> wrote: > > Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given > an option buffer (ARG_PTR_TO_MEM) and the compile-time fixed buffer > size (ARG_CONST_SIZE). > > However, in certain cases we wish to set tunnel options of dynamic > length. > > For example, we have an ebpf program that gets geneve options on > incoming packets, stores them into a map (using a key representing > the incoming flow), and later needs to assign *same* options to > reply packets (belonging to same flow). > > This is currently imposssible without knowing sender's exact geneve > options length, which unfortunately is dymamic. > > Introduce 'bpf_skb_set_tunnel_opt_dynptr'. > > This is a variant of 'bpf_skb_set_tunnel_opt' which gets a bpf dynamic > pointer (ARG_PTR_TO_DYNPTR) parameter whose data points to the options > buffer to set. > > Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com> > > --- > v3: Avoid 'inline' for the __bpf_skb_set_tunopt helper function > v4: change API to be based on bpf_dynptr, suggested by John Fastabend <john.fastabend@gmail.com> > v6: Remove superfluous 'len' from bpf_skb_set_tunnel_opt_dynptr API > (rely on dynptr's internal size), suggested by Andrii Nakryiko <andrii.nakryiko@gmail.com> > --- > include/uapi/linux/bpf.h | 11 +++++++++++ > net/core/filter.c | 31 +++++++++++++++++++++++++++++-- > tools/include/uapi/linux/bpf.h | 11 +++++++++++ > 3 files changed, 51 insertions(+), 2 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 3df78c56c1bf..ba12f7e1ccb6 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -5387,6 +5387,16 @@ union bpf_attr { > * Return > * Current *ktime*. > * > + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt) > + * Description > + * Set tunnel options metadata for the packet associated to *skb* > + * to the option data pointed to by the *opt* dynptr. > + * > + * See also the description of the **bpf_skb_get_tunnel_opt**\ () > + * helper for additional information. > + * Return > + * 0 on success, or a negative error in case of failure. > + * > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5598,6 +5608,7 @@ union bpf_attr { > FN(tcp_raw_check_syncookie_ipv4), \ > FN(tcp_raw_check_syncookie_ipv6), \ > FN(ktime_get_tai_ns), \ > + FN(skb_set_tunnel_opt_dynptr), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/net/core/filter.c b/net/core/filter.c > index e872f45399b0..1c652936ef86 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -4674,8 +4674,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { > .arg4_type = ARG_ANYTHING, > }; > > -BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, > - const u8 *, from, u32, size) > +static u64 __bpf_skb_set_tunopt(struct sk_buff *skb, const u8 *from, u32 size) > { > struct ip_tunnel_info *info = skb_tunnel_info(skb); > const struct metadata_dst *md = this_cpu_ptr(md_dst); > @@ -4690,6 +4689,22 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, > return 0; > } > > +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, > + const u8 *, from, u32, size) > +{ > + return __bpf_skb_set_tunopt(skb, from, size); > +} > + > +BPF_CALL_2(bpf_skb_set_tunnel_opt_dynptr, struct sk_buff *, skb, > + struct bpf_dynptr_kern *, ptr) > +{ > + const u8 *from = bpf_dynptr_get_data(ptr); > + > + if (unlikely(!from)) > + return -EFAULT; > + return __bpf_skb_set_tunopt(skb, from, bpf_dynptr_get_size(ptr)); > +} > + > static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { > .func = bpf_skb_set_tunnel_opt, > .gpl_only = false, > @@ -4699,6 +4714,14 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { > .arg3_type = ARG_CONST_SIZE, > }; > > +static const struct bpf_func_proto bpf_skb_set_tunnel_opt_dynptr_proto = { > + .func = bpf_skb_set_tunnel_opt_dynptr, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL, being able to accept only DYNPTR_TYPE_LOCAL is quite limiting. We have RINGBUF type, as well as soon we'll have MALLOC type. Even with SKB and XDP types there is a linear area that kernel accept directly. So if feels better to not specify exact type, accept any type, and then have internal kernel helpers that will return you direct memory pointer, if it is readable (i.e., for skb/xdp that would mean data points to linear part). So basically exactly the same behavior as bpf_dynptr_data() BPF helper. Also note that dynptr is now CAP_BPF-only, so you might want to make sure that your new helper is also CAP_BPF-guarded? > +}; > + > static const struct bpf_func_proto * > bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) > { > @@ -4719,6 +4742,8 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) > return &bpf_skb_set_tunnel_key_proto; > case BPF_FUNC_skb_set_tunnel_opt: > return &bpf_skb_set_tunnel_opt_proto; > + case BPF_FUNC_skb_set_tunnel_opt_dynptr: > + return &bpf_skb_set_tunnel_opt_dynptr_proto; > default: > return NULL; > } > @@ -7798,6 +7823,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > case BPF_FUNC_skb_get_tunnel_opt: > return &bpf_skb_get_tunnel_opt_proto; > case BPF_FUNC_skb_set_tunnel_opt: > + case BPF_FUNC_skb_set_tunnel_opt_dynptr: > return bpf_get_skb_set_tunnel_proto(func_id); > case BPF_FUNC_redirect: > return &bpf_redirect_proto; > @@ -8145,6 +8171,7 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > case BPF_FUNC_skb_get_tunnel_opt: > return &bpf_skb_get_tunnel_opt_proto; > case BPF_FUNC_skb_set_tunnel_opt: > + case BPF_FUNC_skb_set_tunnel_opt_dynptr: > return bpf_get_skb_set_tunnel_proto(func_id); > case BPF_FUNC_redirect: > return &bpf_redirect_proto; > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 3df78c56c1bf..ba12f7e1ccb6 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -5387,6 +5387,16 @@ union bpf_attr { > * Return > * Current *ktime*. > * > + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt) > + * Description > + * Set tunnel options metadata for the packet associated to *skb* > + * to the option data pointed to by the *opt* dynptr. > + * > + * See also the description of the **bpf_skb_get_tunnel_opt**\ () > + * helper for additional information. > + * Return > + * 0 on success, or a negative error in case of failure. > + * > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5598,6 +5608,7 @@ union bpf_attr { > FN(tcp_raw_check_syncookie_ipv4), \ > FN(tcp_raw_check_syncookie_ipv6), \ > FN(ktime_get_tai_ns), \ > + FN(skb_set_tunnel_opt_dynptr), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > -- > 2.37.3 >
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3df78c56c1bf..ba12f7e1ccb6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5387,6 +5387,16 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data pointed to by the *opt* dynptr. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5598,6 +5608,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(skb_set_tunnel_opt_dynptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index e872f45399b0..1c652936ef86 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4674,8 +4674,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, - const u8 *, from, u32, size) +static u64 __bpf_skb_set_tunopt(struct sk_buff *skb, const u8 *from, u32 size) { struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct metadata_dst *md = this_cpu_ptr(md_dst); @@ -4690,6 +4689,22 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, return 0; } +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, + const u8 *, from, u32, size) +{ + return __bpf_skb_set_tunopt(skb, from, size); +} + +BPF_CALL_2(bpf_skb_set_tunnel_opt_dynptr, struct sk_buff *, skb, + struct bpf_dynptr_kern *, ptr) +{ + const u8 *from = bpf_dynptr_get_data(ptr); + + if (unlikely(!from)) + return -EFAULT; + return __bpf_skb_set_tunopt(skb, from, bpf_dynptr_get_size(ptr)); +} + static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { .func = bpf_skb_set_tunnel_opt, .gpl_only = false, @@ -4699,6 +4714,14 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { .arg3_type = ARG_CONST_SIZE, }; +static const struct bpf_func_proto bpf_skb_set_tunnel_opt_dynptr_proto = { + .func = bpf_skb_set_tunnel_opt_dynptr, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL, +}; + static const struct bpf_func_proto * bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) { @@ -4719,6 +4742,8 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) return &bpf_skb_set_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_opt: return &bpf_skb_set_tunnel_opt_proto; + case BPF_FUNC_skb_set_tunnel_opt_dynptr: + return &bpf_skb_set_tunnel_opt_dynptr_proto; default: return NULL; } @@ -7798,6 +7823,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_get_tunnel_opt: return &bpf_skb_get_tunnel_opt_proto; case BPF_FUNC_skb_set_tunnel_opt: + case BPF_FUNC_skb_set_tunnel_opt_dynptr: return bpf_get_skb_set_tunnel_proto(func_id); case BPF_FUNC_redirect: return &bpf_redirect_proto; @@ -8145,6 +8171,7 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_get_tunnel_opt: return &bpf_skb_get_tunnel_opt_proto; case BPF_FUNC_skb_set_tunnel_opt: + case BPF_FUNC_skb_set_tunnel_opt_dynptr: return bpf_get_skb_set_tunnel_proto(func_id); case BPF_FUNC_redirect: return &bpf_redirect_proto; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3df78c56c1bf..ba12f7e1ccb6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5387,6 +5387,16 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data pointed to by the *opt* dynptr. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5598,6 +5608,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(skb_set_tunnel_opt_dynptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper
Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given an option buffer (ARG_PTR_TO_MEM) and the compile-time fixed buffer size (ARG_CONST_SIZE). However, in certain cases we wish to set tunnel options of dynamic length. For example, we have an ebpf program that gets geneve options on incoming packets, stores them into a map (using a key representing the incoming flow), and later needs to assign *same* options to reply packets (belonging to same flow). This is currently imposssible without knowing sender's exact geneve options length, which unfortunately is dymamic. Introduce 'bpf_skb_set_tunnel_opt_dynptr'. This is a variant of 'bpf_skb_set_tunnel_opt' which gets a bpf dynamic pointer (ARG_PTR_TO_DYNPTR) parameter whose data points to the options buffer to set. Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com> --- v3: Avoid 'inline' for the __bpf_skb_set_tunopt helper function v4: change API to be based on bpf_dynptr, suggested by John Fastabend <john.fastabend@gmail.com> v6: Remove superfluous 'len' from bpf_skb_set_tunnel_opt_dynptr API (rely on dynptr's internal size), suggested by Andrii Nakryiko <andrii.nakryiko@gmail.com> --- include/uapi/linux/bpf.h | 11 +++++++++++ net/core/filter.c | 31 +++++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 11 +++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-)