diff mbox series

[v8,bpf-next,08/14] bpf: add multi-buff support to the bpf_xdp_adjust_tail() API

Message ID 427bd05d147a247fc30fd438be94b5d51845b05f.1617885385.git.lorenzo@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series mvneta: introduce XDP multi-buffer support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 6 maintainers not CCed: yhs@fb.com kpsingh@kernel.org hawk@kernel.org andrii@kernel.org kafai@fb.com songliubraving@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 5112 this patch: 4999
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 85 lines checked
netdev/build_allmodconfig_warn fail Errors and warnings before: 5282 this patch: 5234
netdev/header_inline success Link

Commit Message

Lorenzo Bianconi April 8, 2021, 12:51 p.m. UTC
From: Eelco Chaudron <echaudro@redhat.com>

This change adds support for tail growing and shrinking for XDP multi-buff.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/net/xdp.h |  5 ++++
 net/core/filter.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

Comments

Vladimir Oltean April 8, 2021, 7:15 p.m. UTC | #1
On Thu, Apr 08, 2021 at 02:51:00PM +0200, Lorenzo Bianconi wrote:
> From: Eelco Chaudron <echaudro@redhat.com>
> 
> This change adds support for tail growing and shrinking for XDP multi-buff.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  include/net/xdp.h |  5 ++++
>  net/core/filter.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 68 insertions(+)
> 
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index c8eb7cf4ebed..55751cf2badf 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -159,6 +159,11 @@ static inline void xdp_set_frag_size(skb_frag_t *frag, u32 size)
>  	frag->bv_len = size;
>  }
>  
> +static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag)
> +{
> +	return PAGE_SIZE - xdp_get_frag_size(frag) - xdp_get_frag_offset(frag);
> +}
> +

This is an interesting requirement. Must an XDP frame fragment be a full
PAGE_SIZE? enetc does not fulfill it, and I suspect that none of the
drivers with a "shared page" memory model will.

>  struct xdp_frame {
>  	void *data;
>  	u16 len;
> diff --git a/net/core/filter.c b/net/core/filter.c
> index cae56d08a670..c4eb1392f88e 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3855,11 +3855,74 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
>  	.arg2_type	= ARG_ANYTHING,
>  };
>  
> +static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset)
> +{
> +	struct xdp_shared_info *xdp_sinfo = xdp_get_shared_info_from_buff(xdp);
> +
> +	if (unlikely(xdp_sinfo->nr_frags == 0))
> +		return -EINVAL;

This function is called if xdp->mb is true, but we check whether
nr_frags != 0? Is this condition possible?

> +	if (offset >= 0) {
> +		skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags - 1];
> +		int size;
> +
> +		if (unlikely(offset > xdp_get_frag_tailroom(frag)))
> +			return -EINVAL;
> +
> +		size = xdp_get_frag_size(frag);
> +		memset(xdp_get_frag_address(frag) + size, 0, offset);
> +		xdp_set_frag_size(frag, size + offset);
> +		xdp_sinfo->data_length += offset;
> +	} else {
> +		int i, frags_to_free = 0;
> +
> +		offset = abs(offset);
> +
> +		if (unlikely(offset > ((int)(xdp->data_end - xdp->data) +
> +				       xdp_sinfo->data_length -
> +				       ETH_HLEN)))

I think code alignment should be to xdp->data_end, not to (int).

Also: should we have some sort of helper for calculating the total
length of an xdp_frame (head + frags)? Maybe it's just me, but I find it
slightly confusing that xdp_sinfo->data_length does not account for
everything.

> +			return -EINVAL;
> +
> +		for (i = xdp_sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
> +			skb_frag_t *frag = &xdp_sinfo->frags[i];
> +			int size = xdp_get_frag_size(frag);
> +			int shrink = min_t(int, offset, size);
> +
> +			offset -= shrink;
> +			if (likely(size - shrink > 0)) {
> +				/* When updating the final fragment we have
> +				 * to adjust the data_length in line.
> +				 */
> +				xdp_sinfo->data_length -= shrink;
> +				xdp_set_frag_size(frag, size - shrink);
> +				break;
> +			}
> +
> +			/* When we free the fragments,
> +			 * xdp_return_frags_from_buff() will take care
> +			 * of updating the xdp share info data_length.

s/xdp share info data_length/data_length from xdp_shared_info/

> +			 */
> +			frags_to_free++;
> +		}
> +
> +		if (unlikely(frags_to_free))
> +			xdp_return_num_frags_from_buff(xdp, frags_to_free);
> +
> +		if (unlikely(offset > 0))
> +			xdp->data_end -= offset;
> +	}
> +
> +	return 0;
> +}
> +
>  BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
>  {
>  	void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
>  	void *data_end = xdp->data_end + offset;
>  
> +	if (unlikely(xdp->mb))
> +		return bpf_xdp_mb_adjust_tail(xdp, offset);
> +
>  	/* Notice that xdp_data_hard_end have reserved some tailroom */
>  	if (unlikely(data_end > data_hard_end))
>  		return -EINVAL;
> -- 
> 2.30.2
>
Vladimir Oltean April 8, 2021, 8:54 p.m. UTC | #2
On Thu, Apr 08, 2021 at 10:15:47PM +0300, Vladimir Oltean wrote:
> > +		if (unlikely(offset > ((int)(xdp->data_end - xdp->data) +
> > +				       xdp_sinfo->data_length -
> > +				       ETH_HLEN)))
> 
> Also: should we have some sort of helper for calculating the total
> length of an xdp_frame (head + frags)? Maybe it's just me, but I find it
> slightly confusing that xdp_sinfo->data_length does not account for
> everything.

I see now that xdp_buff :: frame_length is added in patch 10. It is a
bit strange to not use it wherever you can? Could patch 10 be moved
before patch 8?
Lorenzo Bianconi April 9, 2021, 6:13 p.m. UTC | #3
> On Thu, Apr 08, 2021 at 10:15:47PM +0300, Vladimir Oltean wrote:
> > > +		if (unlikely(offset > ((int)(xdp->data_end - xdp->data) +
> > > +				       xdp_sinfo->data_length -
> > > +				       ETH_HLEN)))
> > 
> > Also: should we have some sort of helper for calculating the total
> > length of an xdp_frame (head + frags)? Maybe it's just me, but I find it
> > slightly confusing that xdp_sinfo->data_length does not account for
> > everything.
> 
> I see now that xdp_buff :: frame_length is added in patch 10. It is a
> bit strange to not use it wherever you can? Could patch 10 be moved
> before patch 8?

yes, I agree we can change the patch order

Regards,
Lorenzo
diff mbox series

Patch

diff --git a/include/net/xdp.h b/include/net/xdp.h
index c8eb7cf4ebed..55751cf2badf 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -159,6 +159,11 @@  static inline void xdp_set_frag_size(skb_frag_t *frag, u32 size)
 	frag->bv_len = size;
 }
 
+static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag)
+{
+	return PAGE_SIZE - xdp_get_frag_size(frag) - xdp_get_frag_offset(frag);
+}
+
 struct xdp_frame {
 	void *data;
 	u16 len;
diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..c4eb1392f88e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3855,11 +3855,74 @@  static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset)
+{
+	struct xdp_shared_info *xdp_sinfo = xdp_get_shared_info_from_buff(xdp);
+
+	if (unlikely(xdp_sinfo->nr_frags == 0))
+		return -EINVAL;
+
+	if (offset >= 0) {
+		skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags - 1];
+		int size;
+
+		if (unlikely(offset > xdp_get_frag_tailroom(frag)))
+			return -EINVAL;
+
+		size = xdp_get_frag_size(frag);
+		memset(xdp_get_frag_address(frag) + size, 0, offset);
+		xdp_set_frag_size(frag, size + offset);
+		xdp_sinfo->data_length += offset;
+	} else {
+		int i, frags_to_free = 0;
+
+		offset = abs(offset);
+
+		if (unlikely(offset > ((int)(xdp->data_end - xdp->data) +
+				       xdp_sinfo->data_length -
+				       ETH_HLEN)))
+			return -EINVAL;
+
+		for (i = xdp_sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+			skb_frag_t *frag = &xdp_sinfo->frags[i];
+			int size = xdp_get_frag_size(frag);
+			int shrink = min_t(int, offset, size);
+
+			offset -= shrink;
+			if (likely(size - shrink > 0)) {
+				/* When updating the final fragment we have
+				 * to adjust the data_length in line.
+				 */
+				xdp_sinfo->data_length -= shrink;
+				xdp_set_frag_size(frag, size - shrink);
+				break;
+			}
+
+			/* When we free the fragments,
+			 * xdp_return_frags_from_buff() will take care
+			 * of updating the xdp share info data_length.
+			 */
+			frags_to_free++;
+		}
+
+		if (unlikely(frags_to_free))
+			xdp_return_num_frags_from_buff(xdp, frags_to_free);
+
+		if (unlikely(offset > 0))
+			xdp->data_end -= offset;
+	}
+
+	return 0;
+}
+
 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
 {
 	void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
 	void *data_end = xdp->data_end + offset;
 
+	if (unlikely(xdp->mb))
+		return bpf_xdp_mb_adjust_tail(xdp, offset);
+
 	/* Notice that xdp_data_hard_end have reserved some tailroom */
 	if (unlikely(data_end > data_hard_end))
 		return -EINVAL;