Message ID | 863f4934d251f44ad85a6be08b3737fac74f9b5a.1623674025.git.lorenzo@kernel.org (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | mvneta: introduce XDP multi-buffer support | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | warning | 6 maintainers not CCed: yhs@fb.com kpsingh@kernel.org hawk@kernel.org andrii@kernel.org kafai@fb.com songliubraving@fb.com |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 5151 this patch: 5151 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 109 lines checked |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 5216 this patch: 5216 |
netdev/header_inline | success | Link |
Lorenzo Bianconi wrote: > From: Eelco Chaudron <echaudro@redhat.com> > > This change adds support for tail growing and shrinking for XDP multi-buff. > It would be nice if the commit message gave us some details on how the growing/shrinking works in the multi-buff support. > Signed-off-by: Eelco Chaudron <echaudro@redhat.com> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > include/net/xdp.h | 7 ++++++ > net/core/filter.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ > net/core/xdp.c | 5 ++-- > 3 files changed, 72 insertions(+), 2 deletions(-) > > diff --git a/include/net/xdp.h b/include/net/xdp.h > index 935a6f83115f..3525801c6ed5 100644 > --- a/include/net/xdp.h > +++ b/include/net/xdp.h > @@ -132,6 +132,11 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) > return (struct skb_shared_info *)xdp_data_hard_end(xdp); > } > > +static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag) > +{ > + return PAGE_SIZE - skb_frag_size(frag) - skb_frag_off(frag); > +} > + > struct xdp_frame { > void *data; > u16 len; > @@ -259,6 +264,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) > return xdp_frame; > } > > +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, > + struct xdp_buff *xdp); > void xdp_return_frame(struct xdp_frame *xdpf); > void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); > void xdp_return_buff(struct xdp_buff *xdp); > diff --git a/net/core/filter.c b/net/core/filter.c > index caa88955562e..05f574a3d690 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3859,11 +3859,73 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { > .arg2_type = ARG_ANYTHING, > }; > > +static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset) > +{ > + struct skb_shared_info *sinfo; > + > + if (unlikely(!xdp_buff_is_mb(xdp))) > + return -EINVAL; > + > + sinfo = xdp_get_shared_info_from_buff(xdp); > + if (offset >= 0) { > + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; > + int size; > + > + if (unlikely(offset > xdp_get_frag_tailroom(frag))) > + return -EINVAL; > + > + size = skb_frag_size(frag); > + memset(skb_frag_address(frag) + size, 0, offset); > + skb_frag_size_set(frag, size + offset); > + sinfo->data_len += offset; Can you add some comment on how this works? So today I call bpf_xdp_adjust_tail() to add some trailer to my packet. This looks like it adds tailroom to the last frag? But, then how do I insert my trailer? I don't think we can without the extra multi-buffer access support right. Also data_end will be unchanged yet it will return 0 so my current programs will likely be a bit confused by this. > + } else {
On 23 Jun 2021, at 1:37, John Fastabend wrote: > Lorenzo Bianconi wrote: >> From: Eelco Chaudron <echaudro@redhat.com> >> >> This change adds support for tail growing and shrinking for XDP multi-buff. >> > > It would be nice if the commit message gave us some details on how the > growing/shrinking works in the multi-buff support. Will add this to the next rev. >> Signed-off-by: Eelco Chaudron <echaudro@redhat.com> >> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> >> --- >> include/net/xdp.h | 7 ++++++ >> net/core/filter.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ >> net/core/xdp.c | 5 ++-- >> 3 files changed, 72 insertions(+), 2 deletions(-) >> >> diff --git a/include/net/xdp.h b/include/net/xdp.h >> index 935a6f83115f..3525801c6ed5 100644 >> --- a/include/net/xdp.h >> +++ b/include/net/xdp.h >> @@ -132,6 +132,11 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) >> return (struct skb_shared_info *)xdp_data_hard_end(xdp); >> } >> >> +static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag) >> +{ >> + return PAGE_SIZE - skb_frag_size(frag) - skb_frag_off(frag); >> +} >> + >> struct xdp_frame { >> void *data; >> u16 len; >> @@ -259,6 +264,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) >> return xdp_frame; >> } >> >> +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, >> + struct xdp_buff *xdp); >> void xdp_return_frame(struct xdp_frame *xdpf); >> void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); >> void xdp_return_buff(struct xdp_buff *xdp); >> diff --git a/net/core/filter.c b/net/core/filter.c >> index caa88955562e..05f574a3d690 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -3859,11 +3859,73 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { >> .arg2_type = ARG_ANYTHING, >> }; >> >> +static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset) >> +{ >> + struct skb_shared_info *sinfo; >> + >> + if (unlikely(!xdp_buff_is_mb(xdp))) >> + return -EINVAL; >> + >> + sinfo = xdp_get_shared_info_from_buff(xdp); >> + if (offset >= 0) { >> + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; >> + int size; >> + >> + if (unlikely(offset > xdp_get_frag_tailroom(frag))) >> + return -EINVAL; >> + >> + size = skb_frag_size(frag); >> + memset(skb_frag_address(frag) + size, 0, offset); >> + skb_frag_size_set(frag, size + offset); >> + sinfo->data_len += offset; > > Can you add some comment on how this works? So today I call > bpf_xdp_adjust_tail() to add some trailer to my packet. > This looks like it adds tailroom to the last frag? But, then > how do I insert my trailer? I don't think we can without the > extra multi-buffer access support right. You are right, we need some kind of multi-buffer access helpers. > Also data_end will be unchanged yet it will return 0 so my > current programs will likely be a bit confused by this. Guess this is the tricky part, applications need to be multi-buffer aware. If current applications rely on bpf_xdp_adjust_tail(+) to determine maximum frame length this approach might not work. In this case, we might need an additional helper to do tail expansion with multi buffer support. But then the question arrives how would mb unaware application behave in general when an mb packet is supplied?? It would definitely not determine the correct packet length. >> + } else {
Eelco Chaudron wrote: > > > On 23 Jun 2021, at 1:37, John Fastabend wrote: > > > Lorenzo Bianconi wrote: > >> From: Eelco Chaudron <echaudro@redhat.com> > >> > >> This change adds support for tail growing and shrinking for XDP multi-buff. > >> > > > > It would be nice if the commit message gave us some details on how the > > growing/shrinking works in the multi-buff support. > > Will add this to the next rev. > > >> Signed-off-by: Eelco Chaudron <echaudro@redhat.com> > >> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > >> --- > >> include/net/xdp.h | 7 ++++++ > >> net/core/filter.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ > >> net/core/xdp.c | 5 ++-- > >> 3 files changed, 72 insertions(+), 2 deletions(-) > >> > >> diff --git a/include/net/xdp.h b/include/net/xdp.h > >> index 935a6f83115f..3525801c6ed5 100644 > >> --- a/include/net/xdp.h > >> +++ b/include/net/xdp.h > >> @@ -132,6 +132,11 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) > >> return (struct skb_shared_info *)xdp_data_hard_end(xdp); > >> } > >> > >> +static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag) > >> +{ > >> + return PAGE_SIZE - skb_frag_size(frag) - skb_frag_off(frag); > >> +} > >> + > >> struct xdp_frame { > >> void *data; > >> u16 len; > >> @@ -259,6 +264,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) > >> return xdp_frame; > >> } > >> > >> +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, > >> + struct xdp_buff *xdp); > >> void xdp_return_frame(struct xdp_frame *xdpf); > >> void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); > >> void xdp_return_buff(struct xdp_buff *xdp); > >> diff --git a/net/core/filter.c b/net/core/filter.c > >> index caa88955562e..05f574a3d690 100644 > >> --- a/net/core/filter.c > >> +++ b/net/core/filter.c > >> @@ -3859,11 +3859,73 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { > >> .arg2_type = ARG_ANYTHING, > >> }; > >> > >> +static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset) > >> +{ > >> + struct skb_shared_info *sinfo; > >> + > >> + if (unlikely(!xdp_buff_is_mb(xdp))) > >> + return -EINVAL; > >> + > >> + sinfo = xdp_get_shared_info_from_buff(xdp); > >> + if (offset >= 0) { > >> + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; > >> + int size; > >> + > >> + if (unlikely(offset > xdp_get_frag_tailroom(frag))) > >> + return -EINVAL; > >> + > >> + size = skb_frag_size(frag); > >> + memset(skb_frag_address(frag) + size, 0, offset); > >> + skb_frag_size_set(frag, size + offset); > >> + sinfo->data_len += offset; > > > > Can you add some comment on how this works? So today I call > > bpf_xdp_adjust_tail() to add some trailer to my packet. > > This looks like it adds tailroom to the last frag? But, then > > how do I insert my trailer? I don't think we can without the > > extra multi-buffer access support right. > > You are right, we need some kind of multi-buffer access helpers. > > > Also data_end will be unchanged yet it will return 0 so my > > current programs will likely be a bit confused by this. > > Guess this is the tricky part, applications need to be multi-buffer aware. If current applications rely on bpf_xdp_adjust_tail(+) to determine maximum frame length this approach might not work. In this case, we might need an additional helper to do tail expansion with multi buffer support. > > But then the question arrives how would mb unaware application behave in general when an mb packet is supplied?? It would definitely not determine the correct packet length. Right that was my conclusion as well. Existing programs might have subtle side effects if they start running on multibuffer drivers as is. I don't have any good ideas though on how to handle this. > > >> + } else { >
On Thu, Jun 24, 2021 at 7:24 AM John Fastabend <john.fastabend@gmail.com> wrote: > > Eelco Chaudron wrote: > > > > > > On 23 Jun 2021, at 1:37, John Fastabend wrote: > > > > > Lorenzo Bianconi wrote: > > >> From: Eelco Chaudron <echaudro@redhat.com> > > >> > > >> This change adds support for tail growing and shrinking for XDP multi-buff. > > >> > > > > > > It would be nice if the commit message gave us some details on how the > > > growing/shrinking works in the multi-buff support. > > > > Will add this to the next rev. > > > > >> Signed-off-by: Eelco Chaudron <echaudro@redhat.com> > > >> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > > >> --- > > >> include/net/xdp.h | 7 ++++++ > > >> net/core/filter.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ > > >> net/core/xdp.c | 5 ++-- > > >> 3 files changed, 72 insertions(+), 2 deletions(-) > > >> > > >> diff --git a/include/net/xdp.h b/include/net/xdp.h > > >> index 935a6f83115f..3525801c6ed5 100644 > > >> --- a/include/net/xdp.h > > >> +++ b/include/net/xdp.h > > >> @@ -132,6 +132,11 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) > > >> return (struct skb_shared_info *)xdp_data_hard_end(xdp); > > >> } > > >> > > >> +static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag) > > >> +{ > > >> + return PAGE_SIZE - skb_frag_size(frag) - skb_frag_off(frag); > > >> +} > > >> + > > >> struct xdp_frame { > > >> void *data; > > >> u16 len; > > >> @@ -259,6 +264,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) > > >> return xdp_frame; > > >> } > > >> > > >> +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, > > >> + struct xdp_buff *xdp); > > >> void xdp_return_frame(struct xdp_frame *xdpf); > > >> void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); > > >> void xdp_return_buff(struct xdp_buff *xdp); > > >> diff --git a/net/core/filter.c b/net/core/filter.c > > >> index caa88955562e..05f574a3d690 100644 > > >> --- a/net/core/filter.c > > >> +++ b/net/core/filter.c > > >> @@ -3859,11 +3859,73 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { > > >> .arg2_type = ARG_ANYTHING, > > >> }; > > >> > > >> +static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset) > > >> +{ > > >> + struct skb_shared_info *sinfo; > > >> + > > >> + if (unlikely(!xdp_buff_is_mb(xdp))) > > >> + return -EINVAL; > > >> + > > >> + sinfo = xdp_get_shared_info_from_buff(xdp); > > >> + if (offset >= 0) { > > >> + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; > > >> + int size; > > >> + > > >> + if (unlikely(offset > xdp_get_frag_tailroom(frag))) > > >> + return -EINVAL; > > >> + > > >> + size = skb_frag_size(frag); > > >> + memset(skb_frag_address(frag) + size, 0, offset); > > >> + skb_frag_size_set(frag, size + offset); > > >> + sinfo->data_len += offset; > > > > > > Can you add some comment on how this works? So today I call > > > bpf_xdp_adjust_tail() to add some trailer to my packet. > > > This looks like it adds tailroom to the last frag? But, then > > > how do I insert my trailer? I don't think we can without the > > > extra multi-buffer access support right. > > > > You are right, we need some kind of multi-buffer access helpers. > > > > > Also data_end will be unchanged yet it will return 0 so my > > > current programs will likely be a bit confused by this. > > > > Guess this is the tricky part, applications need to be multi-buffer aware. If current applications rely on bpf_xdp_adjust_tail(+) to determine maximum frame length this approach might not work. In this case, we might need an additional helper to do tail expansion with multi buffer support. > > > > But then the question arrives how would mb unaware application behave in general when an mb packet is supplied?? It would definitely not determine the correct packet length. > > Right that was my conclusion as well. Existing programs might > have subtle side effects if they start running on multibuffer > drivers as is. I don't have any good ideas though on how > to handle this. > Would it be possible to detect multibuffer awareness of a program at load (or attach) time, perhaps by looking for the use of the new multibuffer helpers? That might make it possible to reject a non-multibuffer aware program on multibuffer drivers (or maybe even put the driver into a non-multibuffer mode at attach time), or at the very least issue a warning? > > > > >> + } else { > > > >
> Eelco Chaudron wrote: > > > > > > On 23 Jun 2021, at 1:37, John Fastabend wrote: > > > > > Lorenzo Bianconi wrote: > > >> From: Eelco Chaudron <echaudro@redhat.com> > > >> > > >> This change adds support for tail growing and shrinking for XDP multi-buff. > > >> > > > > > > It would be nice if the commit message gave us some details on how the > > > growing/shrinking works in the multi-buff support. [...] > > Guess this is the tricky part, applications need to be multi-buffer aware. If current applications rely on bpf_xdp_adjust_tail(+) to determine maximum frame length this approach might not work. In this case, we might need an additional helper to do tail expansion with multi buffer support. > > > > But then the question arrives how would mb unaware application behave in general when an mb packet is supplied?? It would definitely not determine the correct packet length. > > Right that was my conclusion as well. Existing programs might > have subtle side effects if they start running on multibuffer > drivers as is. I don't have any good ideas though on how > to handle this. what about checking the program capabilities at load time (e.g. with a special program type) and disable mb feature if the bpf program is not mb-aware? (e.g. forbid to set the MTU greater than 1500B in xdp mode). Regards, Lorenzo > > > > > >> + } else { > > > >
Lorenzo Bianconi <lorenzo.bianconi@redhat.com> writes: >> Eelco Chaudron wrote: >> > >> > >> > On 23 Jun 2021, at 1:37, John Fastabend wrote: >> > >> > > Lorenzo Bianconi wrote: >> > >> From: Eelco Chaudron <echaudro@redhat.com> >> > >> >> > >> This change adds support for tail growing and shrinking for XDP multi-buff. >> > >> >> > > >> > > It would be nice if the commit message gave us some details on how the >> > > growing/shrinking works in the multi-buff support. > [...] >> > Guess this is the tricky part, applications need to be multi-buffer aware. If current applications rely on bpf_xdp_adjust_tail(+) to determine maximum frame length this approach might not work. In this case, we might need an additional helper to do tail expansion with multi buffer support. >> > >> > But then the question arrives how would mb unaware application behave in general when an mb packet is supplied?? It would definitely not determine the correct packet length. >> >> Right that was my conclusion as well. Existing programs might >> have subtle side effects if they start running on multibuffer >> drivers as is. I don't have any good ideas though on how >> to handle this. > > what about checking the program capabilities at load time (e.g. with a > special program type) and disable mb feature if the bpf program is not > mb-aware? (e.g. forbid to set the MTU greater than 1500B in xdp mode). So what happens when that legacy program runs on a veth and gets an mb-enabled frame redirected into it? :) -Toke
Changing the subject to address this point specifically: > Right that was my conclusion as well. Existing programs might have > subtle side effects if they start running on multibuffer drivers as > is. I don't have any good ideas though on how to handle this. So I had a chat about this with Lorenzo, Eelco and Jesper today, and promised I'd summarise our discussion to you all, so this is my attempt at that. Please excuse the long email, I'm just trying to be comprehensive :) So first off, a problem description: If an existing XDP program is exposed to an xdp_buff that is really a multi-buffer, it may end up with subtle and hard-to-debug bugs: If it's parsing the packet it'll only see part of the payload and not be aware of that fact, and if it's calculating the packet length, that will also only be wrong (only counting the first fragment). So what to do about this? First of all, to do anything about it, XDP programs need to be able to declare themselves "multi-buffer aware" (but see point 1 below). We could try to auto-detect it in the verifier by which helpers the program is using, but since existing programs could be perfectly happy to just keep running, it probably needs to be something the program communicates explicitly. One option is to use the expected_attach_type to encode this; programs can then declare it in the source by section name, or the userspace loader can set the type for existing programs if needed. With this, the kernel will know if a given XDP program is multi-buff aware and can decide what to do with that information. For this we came up with basically three options: 1. Do nothing. This would make it up to users / sysadmins to avoid anything breaking by manually making sure to not enable multi-buffer support while loading any XDP programs that will malfunction if presented with an mb frame. This will probably break in interesting ways, but it's nice and simple from an implementation PoV. With this we don't need the declaration discussed above either. 2. Add a check at runtime and drop the frames if they are mb-enabled and the program doesn't understand it. This is relatively simple to implement, but it also makes for difficult-to-understand issues (why are my packets suddenly being dropped?), and it will incur runtime overhead. 3. Reject loading of programs that are not MB-aware when running in an MB-enabled mode. This would make things break in more obvious ways, and still allow a userspace loader to declare a program "MB-aware" to force it to run if necessary. The problem then becomes at what level to block this? Doing this at the driver level is not enough: while a particular driver knows if it's running in multi-buff mode, we can't know for sure if a particular XDP program is multi-buff aware at attach time: it could be tail-calling other programs, or redirecting packets to another interface where it will be processed by a non-MB aware program. So another option is to make it a global toggle: e.g., create a new sysctl to enable multi-buffer. If this is set, reject loading any XDP program that doesn't support multi-buffer mode, and if it's unset, disable multi-buffer mode in all drivers. This will make it explicit when the multi-buffer mode is used, and prevent any accidental subtle malfunction of existing XDP programs. The drawback is that it's a mode switch, so more configuration complexity. None of these options are ideal, of course, but I hope the above explanation at least makes sense. If anyone has any better ideas (or can spot any flaws in the reasoning above) please don't hesitate to let us know! -Toke
diff --git a/include/net/xdp.h b/include/net/xdp.h index 935a6f83115f..3525801c6ed5 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -132,6 +132,11 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp) return (struct skb_shared_info *)xdp_data_hard_end(xdp); } +static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag) +{ + return PAGE_SIZE - skb_frag_size(frag) - skb_frag_off(frag); +} + struct xdp_frame { void *data; u16 len; @@ -259,6 +264,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); diff --git a/net/core/filter.c b/net/core/filter.c index caa88955562e..05f574a3d690 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3859,11 +3859,73 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset) +{ + struct skb_shared_info *sinfo; + + if (unlikely(!xdp_buff_is_mb(xdp))) + return -EINVAL; + + sinfo = xdp_get_shared_info_from_buff(xdp); + if (offset >= 0) { + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; + int size; + + if (unlikely(offset > xdp_get_frag_tailroom(frag))) + return -EINVAL; + + size = skb_frag_size(frag); + memset(skb_frag_address(frag) + size, 0, offset); + skb_frag_size_set(frag, size + offset); + sinfo->data_len += offset; + } else { + int i, n_frags_free = 0, len_free = 0; + + offset = abs(offset); + if (unlikely(offset > ((int)(xdp->data_end - xdp->data) + + sinfo->data_len - ETH_HLEN))) + return -EINVAL; + + for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) { + skb_frag_t *frag = &sinfo->frags[i]; + int size = skb_frag_size(frag); + int shrink = min_t(int, offset, size); + + len_free += shrink; + offset -= shrink; + + if (unlikely(size == shrink)) { + struct page *page = skb_frag_page(frag); + + __xdp_return(page_address(page), &xdp->rxq->mem, + false, NULL); + n_frags_free++; + } else { + skb_frag_size_set(frag, size - shrink); + break; + } + } + sinfo->nr_frags -= n_frags_free; + sinfo->data_len -= len_free; + + if (unlikely(!sinfo->nr_frags)) + xdp_buff_clear_mb(xdp); + + if (unlikely(offset > 0)) + xdp->data_end -= offset; + } + + return 0; +} + BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) { void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ void *data_end = xdp->data_end + offset; + if (unlikely(xdp_buff_is_mb(xdp))) + return bpf_xdp_mb_adjust_tail(xdp, offset); + /* Notice that xdp_data_hard_end have reserved some tailroom */ if (unlikely(data_end > data_hard_end)) return -EINVAL; diff --git a/net/core/xdp.c b/net/core/xdp.c index 71bedf6049a1..ffd70d3e9e5d 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -338,8 +338,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, - struct xdp_buff *xdp) +void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -372,6 +372,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, break; } } +EXPORT_SYMBOL_GPL(__xdp_return); void xdp_return_frame(struct xdp_frame *xdpf) {