diff mbox series

[v5,bpf-next,01/14] xdp: introduce mb in xdp_buff/xdp_frame

Message ID 7e7dbe0c739640b053c930d3cd22ab7588d6aa3c.1607349924.git.lorenzo@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series mvneta: introduce XDP multi-buffer support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 7649 this patch: 7649
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 7759 this patch: 7759
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

lorenzo@kernel.org Dec. 7, 2020, 4:32 p.m. UTC
Introduce multi-buffer bit (mb) in xdp_frame/xdp_buffer data structure
in order to specify if this is a linear buffer (mb = 0) or a multi-buffer
frame (mb = 1). In the latter case the shared_info area at the end of the
first buffer is been properly initialized to link together subsequent
buffers.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/net/xdp.h | 8 ++++++--
 net/core/xdp.c    | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

Comments

Alexander Duyck Dec. 7, 2020, 9:16 p.m. UTC | #1
On Mon, Dec 7, 2020 at 8:36 AM Lorenzo Bianconi <lorenzo@kernel.org> wrote:
>
> Introduce multi-buffer bit (mb) in xdp_frame/xdp_buffer data structure
> in order to specify if this is a linear buffer (mb = 0) or a multi-buffer
> frame (mb = 1). In the latter case the shared_info area at the end of the
> first buffer is been properly initialized to link together subsequent
> buffers.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  include/net/xdp.h | 8 ++++++--
>  net/core/xdp.c    | 1 +
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 700ad5db7f5d..70559720ff44 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -73,7 +73,8 @@ struct xdp_buff {
>         void *data_hard_start;
>         struct xdp_rxq_info *rxq;
>         struct xdp_txq_info *txq;
> -       u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
> +       u32 frame_sz:31; /* frame size to deduce data_hard_end/reserved tailroom*/
> +       u32 mb:1; /* xdp non-linear buffer */
>  };
>

If we are really going to do something like this I say we should just
rip a swath of bits out instead of just grabbing one. We are already
cutting the size down then we should just decide on the minimum size
that is acceptable and just jump to that instead of just stealing one
bit at a time. It looks like we already have differences between the
size here and frame_size in xdp_frame.

If we have to steal a bit why not look at something like one of the
lower 2/3 bits in rxq? You could then do the same thing using dev_rx
in a similar fashion instead of stealing from a bit that is likely to
be used in multiple spots and modifying like this adds extra overhead
to?

>  /* Reserve memory area at end-of data area.
> @@ -97,7 +98,8 @@ struct xdp_frame {
>         u16 len;
>         u16 headroom;
>         u32 metasize:8;
> -       u32 frame_sz:24;
> +       u32 frame_sz:23;
> +       u32 mb:1; /* xdp non-linear frame */
>         /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
>          * while mem info is valid on remote CPU.
>          */

Again, if we are just going to start shrinking frame_sz we should
probably define where we are going to limit ourselves to and just go
straight to that value. Otherwise we are going to start jeopardizing
backwards compatibility at some point when we steal too many bits.

> @@ -154,6 +156,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
>         xdp->data_end = frame->data + frame->len;
>         xdp->data_meta = frame->data - frame->metasize;
>         xdp->frame_sz = frame->frame_sz;
> +       xdp->mb = frame->mb;
>  }
>
>  static inline
> @@ -180,6 +183,7 @@ int xdp_update_frame_from_buff(struct xdp_buff *xdp,
>         xdp_frame->headroom = headroom - sizeof(*xdp_frame);
>         xdp_frame->metasize = metasize;
>         xdp_frame->frame_sz = xdp->frame_sz;
> +       xdp_frame->mb = xdp->mb;
>
>         return 0;
>  }
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 17ffd33c6b18..79dd45234e4d 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -509,6 +509,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
>         xdpf->headroom = 0;
>         xdpf->metasize = metasize;
>         xdpf->frame_sz = PAGE_SIZE;
> +       xdpf->mb = xdp->mb;
>         xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
>
>         xsk_buff_free(xdp);

At this point all you are doing is moving a meaningless flag. I would
think we would want to wait on adding this code until there is some
meaning behind the bit since it doesn't make sense to convert a
multi-buffer xdp_frame to a buffer. If nothing else it really feels
like there is some exception handling missing here as I would expect
that conversion of a multi-buffer frame should fail since you cannot
convert something from multiple to a single without having to redo
allocations and/or linearizing it.
Saeed Mahameed Dec. 7, 2020, 11:03 p.m. UTC | #2
On Mon, 2020-12-07 at 13:16 -0800, Alexander Duyck wrote:
> On Mon, Dec 7, 2020 at 8:36 AM Lorenzo Bianconi <lorenzo@kernel.org>
> wrote:
> > Introduce multi-buffer bit (mb) in xdp_frame/xdp_buffer data
> > structure
> > in order to specify if this is a linear buffer (mb = 0) or a multi-
> > buffer
> > frame (mb = 1). In the latter case the shared_info area at the end
> > of the
> > first buffer is been properly initialized to link together
> > subsequent
> > buffers.
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  include/net/xdp.h | 8 ++++++--
> >  net/core/xdp.c    | 1 +
> >  2 files changed, 7 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > index 700ad5db7f5d..70559720ff44 100644
> > --- a/include/net/xdp.h
> > +++ b/include/net/xdp.h
> > @@ -73,7 +73,8 @@ struct xdp_buff {
> >         void *data_hard_start;
> >         struct xdp_rxq_info *rxq;
> >         struct xdp_txq_info *txq;
> > -       u32 frame_sz; /* frame size to deduce
> > data_hard_end/reserved tailroom*/
> > +       u32 frame_sz:31; /* frame size to deduce
> > data_hard_end/reserved tailroom*/
> > +       u32 mb:1; /* xdp non-linear buffer */
> >  };
> > 
> 
> If we are really going to do something like this I say we should just
> rip a swath of bits out instead of just grabbing one. We are already
> cutting the size down then we should just decide on the minimum size
> that is acceptable and just jump to that instead of just stealing one
> bit at a time. It looks like we already have differences between the
> size here and frame_size in xdp_frame.
> 

+1

> If we have to steal a bit why not look at something like one of the
> lower 2/3 bits in rxq? You could then do the same thing using dev_rx
> in a similar fashion instead of stealing from a bit that is likely to
> be used in multiple spots and modifying like this adds extra overhead
> to?
> 

What do you mean in rxq ? from the pointer ?
Alexander Duyck Dec. 8, 2020, 3:16 a.m. UTC | #3
On Mon, Dec 7, 2020 at 3:03 PM Saeed Mahameed <saeed@kernel.org> wrote:
>
> On Mon, 2020-12-07 at 13:16 -0800, Alexander Duyck wrote:
> > On Mon, Dec 7, 2020 at 8:36 AM Lorenzo Bianconi <lorenzo@kernel.org>
> > wrote:
> > > Introduce multi-buffer bit (mb) in xdp_frame/xdp_buffer data
> > > structure
> > > in order to specify if this is a linear buffer (mb = 0) or a multi-
> > > buffer
> > > frame (mb = 1). In the latter case the shared_info area at the end
> > > of the
> > > first buffer is been properly initialized to link together
> > > subsequent
> > > buffers.
> > >
> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > ---
> > >  include/net/xdp.h | 8 ++++++--
> > >  net/core/xdp.c    | 1 +
> > >  2 files changed, 7 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > > index 700ad5db7f5d..70559720ff44 100644
> > > --- a/include/net/xdp.h
> > > +++ b/include/net/xdp.h
> > > @@ -73,7 +73,8 @@ struct xdp_buff {
> > >         void *data_hard_start;
> > >         struct xdp_rxq_info *rxq;
> > >         struct xdp_txq_info *txq;
> > > -       u32 frame_sz; /* frame size to deduce
> > > data_hard_end/reserved tailroom*/
> > > +       u32 frame_sz:31; /* frame size to deduce
> > > data_hard_end/reserved tailroom*/
> > > +       u32 mb:1; /* xdp non-linear buffer */
> > >  };
> > >
> >
> > If we are really going to do something like this I say we should just
> > rip a swath of bits out instead of just grabbing one. We are already
> > cutting the size down then we should just decide on the minimum size
> > that is acceptable and just jump to that instead of just stealing one
> > bit at a time. It looks like we already have differences between the
> > size here and frame_size in xdp_frame.
> >
>
> +1
>
> > If we have to steal a bit why not look at something like one of the
> > lower 2/3 bits in rxq? You could then do the same thing using dev_rx
> > in a similar fashion instead of stealing from a bit that is likely to
> > be used in multiple spots and modifying like this adds extra overhead
> > to?
> >
>
> What do you mean in rxq ? from the pointer ?

Yeah, the pointers have a few bits that are guaranteed 0 and in my
mind reusing the lower bits from a 4 or 8 byte aligned pointer would
make more sense then stealing the upper bits from the size of the
frame.
Saeed Mahameed Dec. 8, 2020, 6:49 a.m. UTC | #4
On Mon, 2020-12-07 at 19:16 -0800, Alexander Duyck wrote:
> On Mon, Dec 7, 2020 at 3:03 PM Saeed Mahameed <saeed@kernel.org>
> wrote:
> > On Mon, 2020-12-07 at 13:16 -0800, Alexander Duyck wrote:
> > > On Mon, Dec 7, 2020 at 8:36 AM Lorenzo Bianconi <
> > > lorenzo@kernel.org>
> > > wrote:
> > > > Introduce multi-buffer bit (mb) in xdp_frame/xdp_buffer data
> > > > structure
> > > > in order to specify if this is a linear buffer (mb = 0) or a
> > > > multi-
> > > > buffer
> > > > frame (mb = 1). In the latter case the shared_info area at the
> > > > end
> > > > of the
> > > > first buffer is been properly initialized to link together
> > > > subsequent
> > > > buffers.
> > > > 
> > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > ---
> > > >  include/net/xdp.h | 8 ++++++--
> > > >  net/core/xdp.c    | 1 +
> > > >  2 files changed, 7 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > > > index 700ad5db7f5d..70559720ff44 100644
> > > > --- a/include/net/xdp.h
> > > > +++ b/include/net/xdp.h
> > > > @@ -73,7 +73,8 @@ struct xdp_buff {
> > > >         void *data_hard_start;
> > > >         struct xdp_rxq_info *rxq;
> > > >         struct xdp_txq_info *txq;
> > > > -       u32 frame_sz; /* frame size to deduce
> > > > data_hard_end/reserved tailroom*/
> > > > +       u32 frame_sz:31; /* frame size to deduce
> > > > data_hard_end/reserved tailroom*/
> > > > +       u32 mb:1; /* xdp non-linear buffer */
> > > >  };
> > > > 
> > > 
> > > If we are really going to do something like this I say we should
> > > just
> > > rip a swath of bits out instead of just grabbing one. We are
> > > already
> > > cutting the size down then we should just decide on the minimum
> > > size
> > > that is acceptable and just jump to that instead of just stealing
> > > one
> > > bit at a time. It looks like we already have differences between
> > > the
> > > size here and frame_size in xdp_frame.
> > > 
> > 
> > +1
> > 
> > > If we have to steal a bit why not look at something like one of
> > > the
> > > lower 2/3 bits in rxq? You could then do the same thing using
> > > dev_rx
> > > in a similar fashion instead of stealing from a bit that is
> > > likely to
> > > be used in multiple spots and modifying like this adds extra
> > > overhead
> > > to?
> > > 
> > 
> > What do you mean in rxq ? from the pointer ?
> 
> Yeah, the pointers have a few bits that are guaranteed 0 and in my
> mind reusing the lower bits from a 4 or 8 byte aligned pointer would
> make more sense then stealing the upper bits from the size of the
> frame.

Ha, i can't imagine how accessing that pointer would look like ..
is possible to define the pointer as a bit-field and just access it
normally ? or do we need to fix it up every time we need to access it ?
will gcc/static checkers complain about wrong pointer type ?
Jesper Dangaard Brouer Dec. 8, 2020, 9:47 a.m. UTC | #5
On Mon, 07 Dec 2020 22:49:55 -0800
Saeed Mahameed <saeed@kernel.org> wrote:

> On Mon, 2020-12-07 at 19:16 -0800, Alexander Duyck wrote:
> > On Mon, Dec 7, 2020 at 3:03 PM Saeed Mahameed <saeed@kernel.org>
> > wrote:  
> > > On Mon, 2020-12-07 at 13:16 -0800, Alexander Duyck wrote:  
> > > > On Mon, Dec 7, 2020 at 8:36 AM Lorenzo Bianconi <  
> > > > lorenzo@kernel.org>  
> > > > wrote:  
> > > > > Introduce multi-buffer bit (mb) in xdp_frame/xdp_buffer data
> > > > > structure
> > > > > in order to specify if this is a linear buffer (mb = 0) or a
> > > > > multi-
> > > > > buffer
> > > > > frame (mb = 1). In the latter case the shared_info area at the
> > > > > end
> > > > > of the
> > > > > first buffer is been properly initialized to link together
> > > > > subsequent
> > > > > buffers.
> > > > > 
> > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > > ---
> > > > >  include/net/xdp.h | 8 ++++++--
> > > > >  net/core/xdp.c    | 1 +
> > > > >  2 files changed, 7 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > > > > index 700ad5db7f5d..70559720ff44 100644
> > > > > --- a/include/net/xdp.h
> > > > > +++ b/include/net/xdp.h
> > > > > @@ -73,7 +73,8 @@ struct xdp_buff {
> > > > >         void *data_hard_start;
> > > > >         struct xdp_rxq_info *rxq;
> > > > >         struct xdp_txq_info *txq;
> > > > > -       u32 frame_sz; /* frame size to deduce
> > > > > data_hard_end/reserved tailroom*/
> > > > > +       u32 frame_sz:31; /* frame size to deduce
> > > > > data_hard_end/reserved tailroom*/
> > > > > +       u32 mb:1; /* xdp non-linear buffer */
> > > > >  };
> > > > >   
> > > > 
> > > > If we are really going to do something like this I say we should
> > > > just
> > > > rip a swath of bits out instead of just grabbing one. We are
> > > > already
> > > > cutting the size down then we should just decide on the minimum
> > > > size
> > > > that is acceptable and just jump to that instead of just stealing
> > > > one
> > > > bit at a time. It looks like we already have differences between
> > > > the
> > > > size here and frame_size in xdp_frame.
> > > >   
> > > 
> > > +1
> > >   
> > > > If we have to steal a bit why not look at something like one of
> > > > the
> > > > lower 2/3 bits in rxq? You could then do the same thing using
> > > > dev_rx
> > > > in a similar fashion instead of stealing from a bit that is
> > > > likely to
> > > > be used in multiple spots and modifying like this adds extra
> > > > overhead
> > > > to?
> > > >   
> > > 
> > > What do you mean in rxq ? from the pointer ?  
> > 
> > Yeah, the pointers have a few bits that are guaranteed 0 and in my
> > mind reusing the lower bits from a 4 or 8 byte aligned pointer would
> > make more sense then stealing the upper bits from the size of the
> > frame.  
> 
> Ha, i can't imagine how accessing that pointer would look like ..
> is possible to define the pointer as a bit-field and just access it
> normally ? or do we need to fix it up every time we need to access it ?
> will gcc/static checkers complain about wrong pointer type ?

This is a pattern that is used all over the kernel.  Yes, it needs to
be fixed it up every time we access it.  In this case, we don't want to
to deploy this trick.  For two reason, (1) rxq is accessed by BPF
byte-code rewrite (which would also need to handle masking out the
bit), (2) this optimization is trading CPU cycles for saving space.

IIRC Alexei have already pointed out that the change to struct xdp_buff
looks suboptimal.  Why don't you simply add a u8 with the info.

The general point is that struct xdp_buff layout is for fast access,
and struct xdp_frame is a state compressed version of xdp_buff.  (Still
room in xdp_buff is limited to 64 bytes - one cacheline, which is
rather close according to pahole)

Thus, it is more okay to do these bit tricks in struct xdp_frame.  For
xdp_frame, it might be better to take some room/space from the member
'mem' (struct xdp_mem_info).  (Would it help later that multi-buffer
bit is officially part of struct xdp_mem_info, when later freeing the
memory backing the frame?)
diff mbox series

Patch

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 700ad5db7f5d..70559720ff44 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -73,7 +73,8 @@  struct xdp_buff {
 	void *data_hard_start;
 	struct xdp_rxq_info *rxq;
 	struct xdp_txq_info *txq;
-	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
+	u32 frame_sz:31; /* frame size to deduce data_hard_end/reserved tailroom*/
+	u32 mb:1; /* xdp non-linear buffer */
 };
 
 /* Reserve memory area at end-of data area.
@@ -97,7 +98,8 @@  struct xdp_frame {
 	u16 len;
 	u16 headroom;
 	u32 metasize:8;
-	u32 frame_sz:24;
+	u32 frame_sz:23;
+	u32 mb:1; /* xdp non-linear frame */
 	/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
 	 * while mem info is valid on remote CPU.
 	 */
@@ -154,6 +156,7 @@  void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
 	xdp->data_end = frame->data + frame->len;
 	xdp->data_meta = frame->data - frame->metasize;
 	xdp->frame_sz = frame->frame_sz;
+	xdp->mb = frame->mb;
 }
 
 static inline
@@ -180,6 +183,7 @@  int xdp_update_frame_from_buff(struct xdp_buff *xdp,
 	xdp_frame->headroom = headroom - sizeof(*xdp_frame);
 	xdp_frame->metasize = metasize;
 	xdp_frame->frame_sz = xdp->frame_sz;
+	xdp_frame->mb = xdp->mb;
 
 	return 0;
 }
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 17ffd33c6b18..79dd45234e4d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -509,6 +509,7 @@  struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
 	xdpf->headroom = 0;
 	xdpf->metasize = metasize;
 	xdpf->frame_sz = PAGE_SIZE;
+	xdpf->mb = xdp->mb;
 	xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
 
 	xsk_buff_free(xdp);