Message ID | 20230512152607.992209-15-larysa.zaremba@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | new kfunc XDP hints and ice implementation | expand |
On 12/05/2023 17.26, Larysa Zaremba wrote: > From: Aleksander Lobakin <aleksander.lobakin@intel.com> > > When using XDP hints, metadata sometimes has to be much bigger > than 32 bytes. Relax the restriction, allow metadata larger than 32 bytes > and make __skb_metadata_differs() work with bigger lengths. > > Now size of metadata is only limited by the fact it is stored as u8 > in skb_shared_info, so maximum possible value is 255. I'm confused, IIRC the metadata area isn't stored "in skb_shared_info". The maximum possible size is limited by the XDP headroom, which is also shared/limited with/by xdp_frame. I must be reading the sentence wrong, somehow. > Other important > conditions, such as having enough space for xdp_frame building, are already > checked in bpf_xdp_adjust_meta(). > > The requirement of having its length aligned to 4 bytes is still > valid. > > Signed-off-by: Aleksander Lobakin <aleksander.lobakin@intel.com> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com> > --- > include/linux/skbuff.h | 13 ++++++++----- > include/net/xdp.h | 7 ++++++- > 2 files changed, 14 insertions(+), 6 deletions(-) > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index 8ddb4af1a501..afcd372aecdf 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -4219,10 +4219,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, > { > const void *a = skb_metadata_end(skb_a); > const void *b = skb_metadata_end(skb_b); > - /* Using more efficient varaiant than plain call to memcmp(). */ > -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 > u64 diffs = 0; > > + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || > + BITS_PER_LONG != 64) > + goto slow; > + > + /* Using more efficient variant than plain call to memcmp(). */ > switch (meta_len) { > #define __it(x, op) (x -= sizeof(u##op)) > #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) > @@ -4242,11 +4245,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, > fallthrough; > case 4: diffs |= __it_diff(a, b, 32); > break; > + default: > +slow: > + return memcmp(a - meta_len, b - meta_len, meta_len); > } > return diffs; > -#else > - return memcmp(a - meta_len, b - meta_len, meta_len); > -#endif > } > > static inline bool skb_metadata_differs(const struct sk_buff *skb_a, > diff --git a/include/net/xdp.h b/include/net/xdp.h > index 0fbd25616241..f48723250c7c 100644 > --- a/include/net/xdp.h > +++ b/include/net/xdp.h > @@ -370,7 +370,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) > > static inline bool xdp_metalen_invalid(unsigned long metalen) > { > - return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); > + typeof(metalen) meta_max; > + > + meta_max = type_max(typeof_member(struct skb_shared_info, meta_len)); > + BUILD_BUG_ON(!__builtin_constant_p(meta_max)); > + > + return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; > } > > struct xdp_attachment_info {
On Mon, May 15, 2023 at 06:17:02PM +0200, Jesper Dangaard Brouer wrote: > > > On 12/05/2023 17.26, Larysa Zaremba wrote: > > From: Aleksander Lobakin <aleksander.lobakin@intel.com> > > > > When using XDP hints, metadata sometimes has to be much bigger > > than 32 bytes. Relax the restriction, allow metadata larger than 32 bytes > > and make __skb_metadata_differs() work with bigger lengths. > > > > Now size of metadata is only limited by the fact it is stored as u8 > > in skb_shared_info, so maximum possible value is 255. > > I'm confused, IIRC the metadata area isn't stored "in skb_shared_info". > The maximum possible size is limited by the XDP headroom, which is also > shared/limited with/by xdp_frame. I must be reading the sentence wrong, > somehow. It's not 'metadata is stored as u8', it's 'metadata size is stored as u8' :) Maybe I should rephrase it better in v2. > > > Other important > > conditions, such as having enough space for xdp_frame building, are already > > checked in bpf_xdp_adjust_meta(). > > > > The requirement of having its length aligned to 4 bytes is still > > valid. > > > > Signed-off-by: Aleksander Lobakin <aleksander.lobakin@intel.com> > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com> > > --- > > include/linux/skbuff.h | 13 ++++++++----- > > include/net/xdp.h | 7 ++++++- > > 2 files changed, 14 insertions(+), 6 deletions(-) > > > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > > index 8ddb4af1a501..afcd372aecdf 100644 > > --- a/include/linux/skbuff.h > > +++ b/include/linux/skbuff.h > > @@ -4219,10 +4219,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, > > { > > const void *a = skb_metadata_end(skb_a); > > const void *b = skb_metadata_end(skb_b); > > - /* Using more efficient varaiant than plain call to memcmp(). */ > > -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 > > u64 diffs = 0; > > + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || > > + BITS_PER_LONG != 64) > > + goto slow; > > + > > + /* Using more efficient variant than plain call to memcmp(). */ > > switch (meta_len) { > > #define __it(x, op) (x -= sizeof(u##op)) > > #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) > > @@ -4242,11 +4245,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, > > fallthrough; > > case 4: diffs |= __it_diff(a, b, 32); > > break; > > + default: > > +slow: > > + return memcmp(a - meta_len, b - meta_len, meta_len); > > } > > return diffs; > > -#else > > - return memcmp(a - meta_len, b - meta_len, meta_len); > > -#endif > > } > > static inline bool skb_metadata_differs(const struct sk_buff *skb_a, > > diff --git a/include/net/xdp.h b/include/net/xdp.h > > index 0fbd25616241..f48723250c7c 100644 > > --- a/include/net/xdp.h > > +++ b/include/net/xdp.h > > @@ -370,7 +370,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) > > static inline bool xdp_metalen_invalid(unsigned long metalen) > > { > > - return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); > > + typeof(metalen) meta_max; > > + > > + meta_max = type_max(typeof_member(struct skb_shared_info, meta_len)); > > + BUILD_BUG_ON(!__builtin_constant_p(meta_max)); > > + > > + return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; > > } > > struct xdp_attachment_info { > >
From: Larysa Zaremba <larysa.zaremba@intel.com> Date: Mon, 15 May 2023 19:08:39 +0200 > On Mon, May 15, 2023 at 06:17:02PM +0200, Jesper Dangaard Brouer wrote: >> >> >> On 12/05/2023 17.26, Larysa Zaremba wrote: >>> From: Aleksander Lobakin <aleksander.lobakin@intel.com> >>> >>> When using XDP hints, metadata sometimes has to be much bigger >>> than 32 bytes. Relax the restriction, allow metadata larger than 32 bytes >>> and make __skb_metadata_differs() work with bigger lengths. >>> >>> Now size of metadata is only limited by the fact it is stored as u8 >>> in skb_shared_info, so maximum possible value is 255. >> >> I'm confused, IIRC the metadata area isn't stored "in skb_shared_info". >> The maximum possible size is limited by the XDP headroom, which is also >> shared/limited with/by xdp_frame. I must be reading the sentence wrong, >> somehow. skb_shared_info::meta_size is u8. Since metadata gets carried from xdp_buff to skb, this check is needed (it's compile-time constant anyway). Check for headroom is done separately already (two sentences below). > > It's not 'metadata is stored as u8', it's 'metadata size is stored as u8' :) > Maybe I should rephrase it better in v2. > >> >>> Other important >>> conditions, such as having enough space for xdp_frame building, are already >>> checked in bpf_xdp_adjust_meta(). >>> >>> The requirement of having its length aligned to 4 bytes is still >>> valid. BTW I decided to not expand switch-case in __skb_metadata_differs() with more size values because: 1) it's not a common case; 2) memcmp() is +/- fast on x86; 3) it's gross already. But I can if needed :D I think it can be compressed via some macro hell. (this function is called for each skb when GROing if it carries any meta, so sometimes may hurt. Larysa, have you noticed any perf regression between meta <= 32 and > 32?) Thanks, Olek
On 16/05/2023 14.37, Alexander Lobakin wrote: > From: Larysa Zaremba<larysa.zaremba@intel.com> > Date: Mon, 15 May 2023 19:08:39 +0200 > >> On Mon, May 15, 2023 at 06:17:02PM +0200, Jesper Dangaard Brouer wrote: >>> >>> On 12/05/2023 17.26, Larysa Zaremba wrote: >>>> From: Aleksander Lobakin<aleksander.lobakin@intel.com> >>>> >>>> When using XDP hints, metadata sometimes has to be much bigger >>>> than 32 bytes. Relax the restriction, allow metadata larger than 32 bytes >>>> and make __skb_metadata_differs() work with bigger lengths. >>>> >>>> Now size of metadata is only limited by the fact it is stored as u8 >>>> in skb_shared_info, so maximum possible value is 255. >>> >>> I'm confused, IIRC the metadata area isn't stored "in skb_shared_info". >>> The maximum possible size is limited by the XDP headroom, which is also >>> shared/limited with/by xdp_frame. I must be reading the sentence wrong, >>> somehow. > > skb_shared_info::meta_size is u8. Since metadata gets carried from > xdp_buff to skb, this check is needed (it's compile-time constant anyway). > Check for headroom is done separately already (two sentences below). > Damn, argh, for SKBs the "meta_len" is stored in skb_shared_info, which is located on another cacheline. That is a sure way to KILL performance! :-( But only use for SKBs that gets created from xdp with metadata, right? >> It's not 'metadata is stored as u8', it's 'metadata size is stored as u8' :) >> Maybe I should rephrase it better in v2. Yes, a rephrase will be good. --Jesper static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; }
From: Jesper Dangaard Brouer <jbrouer@redhat.com> Date: Tue, 16 May 2023 17:35:27 +0200 > > > On 16/05/2023 14.37, Alexander Lobakin wrote: >> From: Larysa Zaremba<larysa.zaremba@intel.com> >> Date: Mon, 15 May 2023 19:08:39 +0200 >> >>> On Mon, May 15, 2023 at 06:17:02PM +0200, Jesper Dangaard Brouer wrote: >>>> >>>> On 12/05/2023 17.26, Larysa Zaremba wrote: >>>>> From: Aleksander Lobakin<aleksander.lobakin@intel.com> >>>>> >>>>> When using XDP hints, metadata sometimes has to be much bigger >>>>> than 32 bytes. Relax the restriction, allow metadata larger than 32 >>>>> bytes >>>>> and make __skb_metadata_differs() work with bigger lengths. >>>>> >>>>> Now size of metadata is only limited by the fact it is stored as u8 >>>>> in skb_shared_info, so maximum possible value is 255. >>>> >>>> I'm confused, IIRC the metadata area isn't stored "in skb_shared_info". >>>> The maximum possible size is limited by the XDP headroom, which is also >>>> shared/limited with/by xdp_frame. I must be reading the sentence >>>> wrong, >>>> somehow. >> >> skb_shared_info::meta_size is u8. Since metadata gets carried from >> xdp_buff to skb, this check is needed (it's compile-time constant >> anyway). >> Check for headroom is done separately already (two sentences below). >> > > Damn, argh, for SKBs the "meta_len" is stored in skb_shared_info, which > is located on another cacheline. > That is a sure way to KILL performance! :-( Have you read the code? I use type_max(typeof_member(shinfo, meta_len)), what performance are you talking about? The whole xdp_metalen_invalid() gets expanded into: return (metalen % 4) || metalen > 255; at compile-time. All those typeof shenanigans are only to not open-code meta_len's type/size/max. > > But only use for SKBs that gets created from xdp with metadata, right? > > > >>> It's not 'metadata is stored as u8', it's 'metadata size is stored as >>> u8' :) >>> Maybe I should rephrase it better in v2. > > Yes, a rephrase will be good. > > --Jesper > > > > static inline u8 skb_metadata_len(const struct sk_buff *skb) > { > return skb_shinfo(skb)->meta_len; > } > Thanks, Olek
On 19/05/2023 18.35, Alexander Lobakin wrote: > From: Jesper Dangaard Brouer <jbrouer@redhat.com> > Date: Tue, 16 May 2023 17:35:27 +0200 > >> >> On 16/05/2023 14.37, Alexander Lobakin wrote: >>> From: Larysa Zaremba<larysa.zaremba@intel.com> >>> Date: Mon, 15 May 2023 19:08:39 +0200 >>> >>>> On Mon, May 15, 2023 at 06:17:02PM +0200, Jesper Dangaard Brouer wrote: >>>>> >>>>> On 12/05/2023 17.26, Larysa Zaremba wrote: >>>>>> From: Aleksander Lobakin<aleksander.lobakin@intel.com> >>>>>> >>>>>> When using XDP hints, metadata sometimes has to be much bigger >>>>>> than 32 bytes. Relax the restriction, allow metadata larger than 32 >>>>>> bytes >>>>>> and make __skb_metadata_differs() work with bigger lengths. >>>>>> >>>>>> Now size of metadata is only limited by the fact it is stored as u8 >>>>>> in skb_shared_info, so maximum possible value is 255. >>>>> >>>>> I'm confused, IIRC the metadata area isn't stored "in skb_shared_info". >>>>> The maximum possible size is limited by the XDP headroom, which is also >>>>> shared/limited with/by xdp_frame. I must be reading the sentence >>>>> wrong, >>>>> somehow. >>> >>> skb_shared_info::meta_size is u8. Since metadata gets carried from >>> xdp_buff to skb, this check is needed (it's compile-time constant >>> anyway). >>> Check for headroom is done separately already (two sentences below). >>> >> >> Damn, argh, for SKBs the "meta_len" is stored in skb_shared_info, which >> is located on another cacheline. >> That is a sure way to KILL performance! :-( > > Have you read the code? I use type_max(typeof_member(shinfo, meta_len)), > what performance are you talking about? > Not talking about your changes (in this patch). I'm realizing that SKBs using metadata area will have a performance hit due to accessing another cacheline (the meta_len in skb_shared_info). IIRC Daniel complained about this performance hit (in the past), I guess this explains it. IIRC Cilium changed to use percpu variables/datastore to workaround this. > The whole xdp_metalen_invalid() gets expanded into: > > return (metalen % 4) || metalen > 255; > > at compile-time. All those typeof shenanigans are only to not open-code > meta_len's type/size/max. > >> >> But only use for SKBs that gets created from xdp with metadata, right? >> Normal netstack processing actually access this skb_shinfo->meta_len in gro_list_prepare(). As the caller dev_gro_receive() later access other memory in skb_shared_info, then the GRO code path already takes this hit to begin with. --Jesper
From: Jesper Dangaard Brouer <jbrouer@redhat.com> Date: Mon, 22 May 2023 13:41:43 +0200 > > > On 19/05/2023 18.35, Alexander Lobakin wrote: >> From: Jesper Dangaard Brouer <jbrouer@redhat.com> >> Date: Tue, 16 May 2023 17:35:27 +0200 [...] > Not talking about your changes (in this patch). > > I'm realizing that SKBs using metadata area will have a performance hit > due to accessing another cacheline (the meta_len in skb_shared_info). > > IIRC Daniel complained about this performance hit (in the past), I guess > this explains it. IIRC Cilium changed to use percpu variables/datastore > to workaround this. Why should we compare metadata of skbs on GRO anyway? I was disabling it the old hints series (conditionally, if driver asks), moreover... ...if metadata contains full checksum, GRO will be broken completely due to this comparison (or any other frame-unique fields. VLAN tags and hashes are okay). > > >> The whole xdp_metalen_invalid() gets expanded into: >> >> return (metalen % 4) || metalen > 255; >> >> at compile-time. All those typeof shenanigans are only to not open-code >> meta_len's type/size/max. >> >>> >>> But only use for SKBs that gets created from xdp with metadata, right? >>> > > Normal netstack processing actually access this skb_shinfo->meta_len in > gro_list_prepare(). As the caller dev_gro_receive() later access other > memory in skb_shared_info, then the GRO code path already takes this hit > to begin with. You access skb_shinfo() often even before running XDP program, for example, when a frame is multi-buffer. Plus HW timestamps are also there, and so on. > > --Jesper > Thanks, Olek
On 5/22/23 5:28 PM, Alexander Lobakin wrote: > From: Jesper Dangaard Brouer <jbrouer@redhat.com> > Date: Mon, 22 May 2023 13:41:43 +0200 >> On 19/05/2023 18.35, Alexander Lobakin wrote: >>> From: Jesper Dangaard Brouer <jbrouer@redhat.com> >>> Date: Tue, 16 May 2023 17:35:27 +0200 > > [...] > >> Not talking about your changes (in this patch). >> >> I'm realizing that SKBs using metadata area will have a performance hit >> due to accessing another cacheline (the meta_len in skb_shared_info). >> >> IIRC Daniel complained about this performance hit (in the past), I guess >> this explains it. IIRC Cilium changed to use percpu variables/datastore >> to workaround this. > > Why should we compare metadata of skbs on GRO anyway? I was disabling it > the old hints series (conditionally, if driver asks), moreover... > ...if metadata contains full checksum, GRO will be broken completely due > to this comparison (or any other frame-unique fields. VLAN tags and > hashes are okay). This is when BPF prog on XDP populates metadata with custom data when it wants to transfer information from XDP to skb aka tc BPF prog side. percpu data store may not work here as it is not guaranteed that skb might end up on same CPU. >>> The whole xdp_metalen_invalid() gets expanded into: >>> >>> return (metalen % 4) || metalen > 255; >>> >>> at compile-time. All those typeof shenanigans are only to not open-code >>> meta_len's type/size/max. >>> >>>> >>>> But only use for SKBs that gets created from xdp with metadata, right? >>>> >> >> Normal netstack processing actually access this skb_shinfo->meta_len in >> gro_list_prepare(). As the caller dev_gro_receive() later access other >> memory in skb_shared_info, then the GRO code path already takes this hit >> to begin with. > > You access skb_shinfo() often even before running XDP program, for > example, when a frame is multi-buffer. Plus HW timestamps are also > there, and so on.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8ddb4af1a501..afcd372aecdf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4219,10 +4219,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); - /* Using more efficient varaiant than plain call to memcmp(). */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 diffs = 0; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + BITS_PER_LONG != 64) + goto slow; + + /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) @@ -4242,11 +4245,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; + default: +slow: + return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; -#else - return memcmp(a - meta_len, b - meta_len, meta_len); -#endif } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, diff --git a/include/net/xdp.h b/include/net/xdp.h index 0fbd25616241..f48723250c7c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -370,7 +370,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) static inline bool xdp_metalen_invalid(unsigned long metalen) { - return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); + typeof(metalen) meta_max; + + meta_max = type_max(typeof_member(struct skb_shared_info, meta_len)); + BUILD_BUG_ON(!__builtin_constant_p(meta_max)); + + return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; } struct xdp_attachment_info {