Message ID | 20250305-afabre-traits-010-rfc2-v1-2-d0ecfb869797@cloudflare.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | BPF |
Headers | show |
Series | traits: Per packet metadata KV store | expand |
On Mar 05, arthur@arthurfabre.com wrote: > From: Arthur Fabre <afabre@cloudflare.com> > [...] > +static __always_inline void *xdp_buff_traits(const struct xdp_buff *xdp) > +{ > + return xdp->data_hard_start + _XDP_FRAME_SIZE; > +} > + > static __always_inline void > xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) > { > @@ -133,6 +139,13 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, > xdp->data = data; > xdp->data_end = data + data_len; > xdp->data_meta = meta_valid ? data : data + 1; > + > + if (meta_valid) { can we relax this constraint and use xdp->data as end boundary here? > + /* We assume drivers reserve enough headroom to store xdp_frame > + * and the traits header. > + */ > + traits_init(xdp_buff_traits(xdp), xdp->data_meta); > + } > } > > /* Reserve memory area at end-of data area. > @@ -267,6 +280,8 @@ struct xdp_frame { > u32 flags; /* supported values defined in xdp_buff_flags */ > }; > > +static_assert(sizeof(struct xdp_frame) == _XDP_FRAME_SIZE); > + > static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame) > { > return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); > @@ -517,6 +532,11 @@ static inline bool xdp_metalen_invalid(unsigned long metalen) > return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; > } > > +static __always_inline void *xdp_meta_hard_start(const struct xdp_buff *xdp) > +{ > + return xdp_buff_traits(xdp) + traits_size(xdp_buff_traits(xdp)); here we are always consuming sizeof(struct __trait_hdr)), right? We can do somehing smarter and check if traits are really used? (e.g. adding in the flags in xdp_buff)? > +} > + > struct xdp_attachment_info { > struct bpf_prog *prog; > u32 flags; > diff --git a/net/core/filter.c b/net/core/filter.c > index dcc53ac5c5458f67a422453134665d43d466a02e..79b78e7cd57fd78c6cc8443da54ae96408c496b0 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -85,6 +85,7 @@ > #include <linux/un.h> > #include <net/xdp_sock_drv.h> > #include <net/inet_dscp.h> > +#include <net/trait.h> > > #include "dev.h" > > @@ -3935,9 +3936,8 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) > > BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) > { > - void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); > unsigned long metalen = xdp_get_metalen(xdp); > - void *data_start = xdp_frame_end + metalen; > + void *data_start = xdp_meta_hard_start(xdp) + metalen; We could waste 16byte here, right? Regards, Lorenzo > void *data = xdp->data + offset; > > if (unlikely(data < data_start || > @@ -4228,13 +4228,12 @@ static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = { > > BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) > { > - void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); > void *meta = xdp->data_meta + offset; > unsigned long metalen = xdp->data - meta; > > if (xdp_data_meta_unsupported(xdp)) > return -ENOTSUPP; > - if (unlikely(meta < xdp_frame_end || > + if (unlikely(meta < xdp_meta_hard_start(xdp) || > meta > xdp->data)) > return -EINVAL; > if (unlikely(xdp_metalen_invalid(metalen))) > diff --git a/net/core/xdp.c b/net/core/xdp.c > index 2c6ab6fb452f7b90d85125ae17fef96cfc9a8576..2e87f82aa5f835f60295d859a524e40bd47c42ee 100644 > --- a/net/core/xdp.c > +++ b/net/core/xdp.c > @@ -1032,3 +1032,53 @@ void xdp_features_clear_redirect_target(struct net_device *dev) > xdp_set_features_flag(dev, val); > } > EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); > + > +__bpf_kfunc_start_defs(); > + > +__bpf_kfunc int bpf_xdp_trait_set(const struct xdp_buff *xdp, u64 key, > + const void *val, u64 val__sz, u64 flags) > +{ > + if (xdp_data_meta_unsupported(xdp)) > + return -EOPNOTSUPP; > + > + return trait_set(xdp_buff_traits(xdp), xdp->data_meta, key, > + val, val__sz, flags); > +} > + > +__bpf_kfunc int bpf_xdp_trait_get(const struct xdp_buff *xdp, u64 key, > + void *val, u64 val__sz) > +{ > + if (xdp_data_meta_unsupported(xdp)) > + return -EOPNOTSUPP; > + > + return trait_get(xdp_buff_traits(xdp), key, val, val__sz); > +} > + > +__bpf_kfunc int bpf_xdp_trait_del(const struct xdp_buff *xdp, u64 key) > +{ > + if (xdp_data_meta_unsupported(xdp)) > + return -EOPNOTSUPP; > + > + return trait_del(xdp_buff_traits(xdp), key); > +} > + > +__bpf_kfunc_end_defs(); > + > +BTF_KFUNCS_START(xdp_trait) > +// TODO - should we use KF_TRUSTED_ARGS? https://www.kernel.org/doc/html/next/bpf/kfuncs.html#kf-trusted-args-flag > +BTF_ID_FLAGS(func, bpf_xdp_trait_set) > +BTF_ID_FLAGS(func, bpf_xdp_trait_get) > +BTF_ID_FLAGS(func, bpf_xdp_trait_del) > +BTF_KFUNCS_END(xdp_trait) > + > +static const struct btf_kfunc_id_set xdp_trait_kfunc_set = { > + .owner = THIS_MODULE, > + .set = &xdp_trait, > +}; > + > +static int xdp_trait_init(void) > +{ > + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, > + &xdp_trait_kfunc_set); > +} > +late_initcall(xdp_trait_init); > > -- > 2.43.0 > >
On Fri Mar 7, 2025 at 8:14 PM CET, Lorenzo Bianconi wrote: > On Mar 05, arthur@arthurfabre.com wrote: > > From: Arthur Fabre <afabre@cloudflare.com> > > > > [...] > > > +static __always_inline void *xdp_buff_traits(const struct xdp_buff *xdp) > > +{ > > + return xdp->data_hard_start + _XDP_FRAME_SIZE; > > +} > > + > > static __always_inline void > > xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) > > { > > @@ -133,6 +139,13 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, > > xdp->data = data; > > xdp->data_end = data + data_len; > > xdp->data_meta = meta_valid ? data : data + 1; > > + > > + if (meta_valid) { > > can we relax this constraint and use xdp->data as end boundary here? The problem isn't having a boundary, it's patching all the drivers to propagate that traits are present to the skb layer. See patch 8 "trait: Propagate presence of traits to sk_buff", and patches 9-15 for driver changes. There's some discussion around updating all the remaining drivers to support XDP metadata, if instead of making them call skb_metadata_set() we use a more "generic" hook like "xdp_buff_update_skb()" from this series, we can use it for traits later. > > > + /* We assume drivers reserve enough headroom to store xdp_frame > > + * and the traits header. > > + */ > > + traits_init(xdp_buff_traits(xdp), xdp->data_meta); > > + } > > } > > > > /* Reserve memory area at end-of data area. > > @@ -267,6 +280,8 @@ struct xdp_frame { > > u32 flags; /* supported values defined in xdp_buff_flags */ > > }; > > > > +static_assert(sizeof(struct xdp_frame) == _XDP_FRAME_SIZE); > > + > > static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame) > > { > > return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); > > @@ -517,6 +532,11 @@ static inline bool xdp_metalen_invalid(unsigned long metalen) > > return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; > > } > > > > +static __always_inline void *xdp_meta_hard_start(const struct xdp_buff *xdp) > > +{ > > + return xdp_buff_traits(xdp) + traits_size(xdp_buff_traits(xdp)); > > here we are always consuming sizeof(struct __trait_hdr)), right? We can do > somehing smarter and check if traits are really used? (e.g. adding in the flags > in xdp_buff)? Yes, we're always taking space from the headroom for struct __trait_hdr. I think it's impossible to tell if traits are used or not early enough: users could be setting a trait for the first time in iptables or TC. But we don't know that in XDP. > > > +} > > + > > struct xdp_attachment_info { > > struct bpf_prog *prog; > > u32 flags; > > diff --git a/net/core/filter.c b/net/core/filter.c > > index dcc53ac5c5458f67a422453134665d43d466a02e..79b78e7cd57fd78c6cc8443da54ae96408c496b0 100644 > > --- a/net/core/filter.c > > +++ b/net/core/filter.c > > @@ -85,6 +85,7 @@ > > #include <linux/un.h> > > #include <net/xdp_sock_drv.h> > > #include <net/inet_dscp.h> > > +#include <net/trait.h> > > > > #include "dev.h" > > > > @@ -3935,9 +3936,8 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) > > > > BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) > > { > > - void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); > > unsigned long metalen = xdp_get_metalen(xdp); > > - void *data_start = xdp_frame_end + metalen; > > + void *data_start = xdp_meta_hard_start(xdp) + metalen; > > We could waste 16byte here, right? If traits aren't being used? [...]
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bb2b751d274acff931281a72e8b4b0c699b4e8af..03553c2200ab1c3750b799edb94fa3b94e11a5f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -274,6 +274,9 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +/* From xdp.h, to avoid indirectly including skbuff.h */ +#define _XDP_FRAME_SIZE (40) + struct ahash_request; struct net_device; struct scatterlist; diff --git a/include/net/xdp.h b/include/net/xdp.h index 4dafc5e021f13688f0bf69a21bff58d394d1ac28..58019fa299b56dbd45c104fdfa807f73af6e4fa4 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -10,6 +10,7 @@ #include <linux/filter.h> #include <linux/netdevice.h> #include <linux/skbuff.h> /* skb_shared_info */ +#include <net/trait.h> #include <net/page_pool/types.h> @@ -115,6 +116,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; } +static __always_inline void *xdp_buff_traits(const struct xdp_buff *xdp) +{ + return xdp->data_hard_start + _XDP_FRAME_SIZE; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { @@ -133,6 +139,13 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, xdp->data = data; xdp->data_end = data + data_len; xdp->data_meta = meta_valid ? data : data + 1; + + if (meta_valid) { + /* We assume drivers reserve enough headroom to store xdp_frame + * and the traits header. + */ + traits_init(xdp_buff_traits(xdp), xdp->data_meta); + } } /* Reserve memory area at end-of data area. @@ -267,6 +280,8 @@ struct xdp_frame { u32 flags; /* supported values defined in xdp_buff_flags */ }; +static_assert(sizeof(struct xdp_frame) == _XDP_FRAME_SIZE); + static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame) { return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); @@ -517,6 +532,11 @@ static inline bool xdp_metalen_invalid(unsigned long metalen) return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; } +static __always_inline void *xdp_meta_hard_start(const struct xdp_buff *xdp) +{ + return xdp_buff_traits(xdp) + traits_size(xdp_buff_traits(xdp)); +} + struct xdp_attachment_info { struct bpf_prog *prog; u32 flags; diff --git a/net/core/filter.c b/net/core/filter.c index dcc53ac5c5458f67a422453134665d43d466a02e..79b78e7cd57fd78c6cc8443da54ae96408c496b0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -85,6 +85,7 @@ #include <linux/un.h> #include <net/xdp_sock_drv.h> #include <net/inet_dscp.h> +#include <net/trait.h> #include "dev.h" @@ -3935,9 +3936,8 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { - void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); unsigned long metalen = xdp_get_metalen(xdp); - void *data_start = xdp_frame_end + metalen; + void *data_start = xdp_meta_hard_start(xdp) + metalen; void *data = xdp->data + offset; if (unlikely(data < data_start || @@ -4228,13 +4228,12 @@ static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = { BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) { - void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); void *meta = xdp->data_meta + offset; unsigned long metalen = xdp->data - meta; if (xdp_data_meta_unsupported(xdp)) return -ENOTSUPP; - if (unlikely(meta < xdp_frame_end || + if (unlikely(meta < xdp_meta_hard_start(xdp) || meta > xdp->data)) return -EINVAL; if (unlikely(xdp_metalen_invalid(metalen))) diff --git a/net/core/xdp.c b/net/core/xdp.c index 2c6ab6fb452f7b90d85125ae17fef96cfc9a8576..2e87f82aa5f835f60295d859a524e40bd47c42ee 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -1032,3 +1032,53 @@ void xdp_features_clear_redirect_target(struct net_device *dev) xdp_set_features_flag(dev, val); } EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_xdp_trait_set(const struct xdp_buff *xdp, u64 key, + const void *val, u64 val__sz, u64 flags) +{ + if (xdp_data_meta_unsupported(xdp)) + return -EOPNOTSUPP; + + return trait_set(xdp_buff_traits(xdp), xdp->data_meta, key, + val, val__sz, flags); +} + +__bpf_kfunc int bpf_xdp_trait_get(const struct xdp_buff *xdp, u64 key, + void *val, u64 val__sz) +{ + if (xdp_data_meta_unsupported(xdp)) + return -EOPNOTSUPP; + + return trait_get(xdp_buff_traits(xdp), key, val, val__sz); +} + +__bpf_kfunc int bpf_xdp_trait_del(const struct xdp_buff *xdp, u64 key) +{ + if (xdp_data_meta_unsupported(xdp)) + return -EOPNOTSUPP; + + return trait_del(xdp_buff_traits(xdp), key); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(xdp_trait) +// TODO - should we use KF_TRUSTED_ARGS? https://www.kernel.org/doc/html/next/bpf/kfuncs.html#kf-trusted-args-flag +BTF_ID_FLAGS(func, bpf_xdp_trait_set) +BTF_ID_FLAGS(func, bpf_xdp_trait_get) +BTF_ID_FLAGS(func, bpf_xdp_trait_del) +BTF_KFUNCS_END(xdp_trait) + +static const struct btf_kfunc_id_set xdp_trait_kfunc_set = { + .owner = THIS_MODULE, + .set = &xdp_trait, +}; + +static int xdp_trait_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &xdp_trait_kfunc_set); +} +late_initcall(xdp_trait_init);