diff mbox series

[RFC,bpf-next,05/23] ice: Introduce ice_xdp_buff

Message ID 20230824192703.712881-6-larysa.zaremba@intel.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series XDP metadata via kfuncs for ice + mlx5 | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 pending Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 pending Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 pending Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1330 this patch: 1330
netdev/cc_maintainers warning 7 maintainers not CCed: hawk@kernel.org jesse.brandeburg@intel.com intel-wired-lan@lists.osuosl.org davem@davemloft.net anthony.l.nguyen@intel.com pabeni@redhat.com edumazet@google.com
netdev/build_clang success Errors and warnings before: 1353 this patch: 1353
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1353 this patch: 1353
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 99 lines checked
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Larysa Zaremba Aug. 24, 2023, 7:26 p.m. UTC
In order to use XDP hints via kfuncs we need to put
RX descriptor and ring pointers just next to xdp_buff.
Same as in hints implementations in other drivers, we achieve
this through putting xdp_buff into a child structure.

Currently, xdp_buff is stored in the ring structure,
so replace it with union that includes child structure.
This way enough memory is available while existing XDP code
remains isolated from hints.

Minimum size of the new child structure (ice_xdp_buff) is exactly
64 bytes (single cache line). To place it at the start of a cache line,
move 'next' field from CL1 to CL3, as it isn't used often. This still
leaves 128 bits available in CL3 for packet context extensions.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
 drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
 3 files changed, 38 insertions(+), 5 deletions(-)

Comments

Fijalkowski, Maciej Sept. 4, 2023, 3:32 p.m. UTC | #1
On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> In order to use XDP hints via kfuncs we need to put
> RX descriptor and ring pointers just next to xdp_buff.
> Same as in hints implementations in other drivers, we achieve
> this through putting xdp_buff into a child structure.

Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
if i'm reading this right.

> 
> Currently, xdp_buff is stored in the ring structure,
> so replace it with union that includes child structure.
> This way enough memory is available while existing XDP code
> remains isolated from hints.
> 
> Minimum size of the new child structure (ice_xdp_buff) is exactly
> 64 bytes (single cache line). To place it at the start of a cache line,
> move 'next' field from CL1 to CL3, as it isn't used often. This still
> leaves 128 bits available in CL3 for packet context extensions.

I believe ice_xdp_buff will be beefed up in later patches, so what is the
point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
CL anyway.

> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
>  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
>  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
>  3 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> index 40f2f6dabb81..4e6546d9cf85 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
>   * @xdp_prog: XDP program to run
>   * @xdp_ring: ring to be used for XDP_TX action
>   * @rx_buf: Rx buffer to store the XDP action
> + * @eop_desc: Last descriptor in packet to read metadata from
>   *
>   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
>   */
>  static void
>  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
>  	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> -	    struct ice_rx_buf *rx_buf)
> +	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
>  {
>  	unsigned int ret = ICE_XDP_PASS;
>  	u32 act;
> @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
>  	if (!xdp_prog)
>  		goto exit;
>  
> +	ice_xdp_meta_set_desc(xdp, eop_desc);

I am currently not sure if for multi-buffer case HW repeats all the
necessary info within each descriptor for every frag? IOW shouldn't you be
using the ice_rx_ring::first_desc?

Would be good to test hints for mbuf case for sure.

> +
>  	act = bpf_prog_run_xdp(xdp_prog, xdp);
>  	switch (act) {
>  	case XDP_PASS:
> @@ -1240,7 +1243,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
>  		if (ice_is_non_eop(rx_ring, rx_desc))
>  			continue;
>  
> -		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
> +		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
>  		if (rx_buf->act == ICE_XDP_PASS)
>  			goto construct_skb;
>  		total_rx_bytes += xdp_get_buff_len(xdp);
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> index 166413fc33f4..d0ab2c4c0c91 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -257,6 +257,18 @@ enum ice_rx_dtype {
>  	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
>  };
>  
> +struct ice_pkt_ctx {
> +	const union ice_32b_rx_flex_desc *eop_desc;
> +};
> +
> +struct ice_xdp_buff {
> +	struct xdp_buff xdp_buff;
> +	struct ice_pkt_ctx pkt_ctx;
> +};
> +
> +/* Required for compatibility with xdp_buffs from xsk_pool */
> +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
> +
>  /* indices into GLINT_ITR registers */
>  #define ICE_RX_ITR	ICE_IDX_ITR0
>  #define ICE_TX_ITR	ICE_IDX_ITR1
> @@ -298,7 +310,6 @@ enum ice_dynamic_itr {
>  /* descriptor ring, associated with a VSI */
>  struct ice_rx_ring {
>  	/* CL1 - 1st cacheline starts here */
> -	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
>  	void *desc;			/* Descriptor ring memory */
>  	struct device *dev;		/* Used for DMA mapping */
>  	struct net_device *netdev;	/* netdev ring maps to */
> @@ -310,12 +321,19 @@ struct ice_rx_ring {
>  	u16 count;			/* Number of descriptors */
>  	u16 reg_idx;			/* HW register index of the ring */
>  	u16 next_to_alloc;
> -	/* CL2 - 2nd cacheline starts here */
> +
>  	union {
>  		struct ice_rx_buf *rx_buf;
>  		struct xdp_buff **xdp_buf;
>  	};
> -	struct xdp_buff xdp;
> +	/* CL2 - 2nd cacheline starts here */
> +	union {
> +		struct ice_xdp_buff xdp_ext;
> +		struct {
> +			struct xdp_buff xdp;
> +			struct ice_pkt_ctx pkt_ctx;
> +		};
> +	};
>  	/* CL3 - 3rd cacheline starts here */
>  	struct bpf_prog *xdp_prog;
>  	u16 rx_offset;
> @@ -325,6 +343,8 @@ struct ice_rx_ring {
>  	u16 next_to_clean;
>  	u16 first_desc;
>  
> +	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
> +
>  	/* stats structs */
>  	struct ice_ring_stats *ring_stats;
>  
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> index e1d49e1235b3..145883eec129 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> @@ -151,4 +151,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
>  		       struct sk_buff *skb);
>  void
>  ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
> +
> +static inline void
> +ice_xdp_meta_set_desc(struct xdp_buff *xdp,
> +		      union ice_32b_rx_flex_desc *eop_desc)
> +{
> +	struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff,
> +						    xdp_buff);
> +
> +	xdp_ext->pkt_ctx.eop_desc = eop_desc;
> +}
>  #endif /* !_ICE_TXRX_LIB_H_ */
> -- 
> 2.41.0
>
Larysa Zaremba Sept. 4, 2023, 6:11 p.m. UTC | #2
On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > In order to use XDP hints via kfuncs we need to put
> > RX descriptor and ring pointers just next to xdp_buff.
> > Same as in hints implementations in other drivers, we achieve
> > this through putting xdp_buff into a child structure.
> 
> Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> if i'm reading this right.
>

ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could 
replace pointer to xdp_buff, but not in reverse).

> > 
> > Currently, xdp_buff is stored in the ring structure,
> > so replace it with union that includes child structure.
> > This way enough memory is available while existing XDP code
> > remains isolated from hints.
> > 
> > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > 64 bytes (single cache line). To place it at the start of a cache line,
> > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > leaves 128 bits available in CL3 for packet context extensions.
> 
> I believe ice_xdp_buff will be beefed up in later patches, so what is the
> point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> CL anyway.
>

It is to at least keep xdp_buff and descriptor pointer (used for every hint) in 
a single CL, other fields are situational.

> > 
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> >  3 files changed, 38 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > index 40f2f6dabb81..4e6546d9cf85 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> >   * @xdp_prog: XDP program to run
> >   * @xdp_ring: ring to be used for XDP_TX action
> >   * @rx_buf: Rx buffer to store the XDP action
> > + * @eop_desc: Last descriptor in packet to read metadata from
> >   *
> >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> >   */
> >  static void
> >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> >  	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > -	    struct ice_rx_buf *rx_buf)
> > +	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> >  {
> >  	unsigned int ret = ICE_XDP_PASS;
> >  	u32 act;
> > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> >  	if (!xdp_prog)
> >  		goto exit;
> >  
> > +	ice_xdp_meta_set_desc(xdp, eop_desc);
> 
> I am currently not sure if for multi-buffer case HW repeats all the
> necessary info within each descriptor for every frag? IOW shouldn't you be
> using the ice_rx_ring::first_desc?
> 
> Would be good to test hints for mbuf case for sure.
>

In the skb path, we take metadata from the last descriptor only, so this should 
be fine. Really worth testing with mbuf though.

> > +
> >  	act = bpf_prog_run_xdp(xdp_prog, xdp);
> >  	switch (act) {
> >  	case XDP_PASS:
> > @@ -1240,7 +1243,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
> >  		if (ice_is_non_eop(rx_ring, rx_desc))
> >  			continue;
> >  
> > -		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
> > +		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
> >  		if (rx_buf->act == ICE_XDP_PASS)
> >  			goto construct_skb;
> >  		total_rx_bytes += xdp_get_buff_len(xdp);
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > index 166413fc33f4..d0ab2c4c0c91 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > @@ -257,6 +257,18 @@ enum ice_rx_dtype {
> >  	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
> >  };
> >  
> > +struct ice_pkt_ctx {
> > +	const union ice_32b_rx_flex_desc *eop_desc;
> > +};
> > +
> > +struct ice_xdp_buff {
> > +	struct xdp_buff xdp_buff;
> > +	struct ice_pkt_ctx pkt_ctx;
> > +};
> > +
> > +/* Required for compatibility with xdp_buffs from xsk_pool */
> > +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
> > +
> >  /* indices into GLINT_ITR registers */
> >  #define ICE_RX_ITR	ICE_IDX_ITR0
> >  #define ICE_TX_ITR	ICE_IDX_ITR1
> > @@ -298,7 +310,6 @@ enum ice_dynamic_itr {
> >  /* descriptor ring, associated with a VSI */
> >  struct ice_rx_ring {
> >  	/* CL1 - 1st cacheline starts here */
> > -	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
> >  	void *desc;			/* Descriptor ring memory */
> >  	struct device *dev;		/* Used for DMA mapping */
> >  	struct net_device *netdev;	/* netdev ring maps to */
> > @@ -310,12 +321,19 @@ struct ice_rx_ring {
> >  	u16 count;			/* Number of descriptors */
> >  	u16 reg_idx;			/* HW register index of the ring */
> >  	u16 next_to_alloc;
> > -	/* CL2 - 2nd cacheline starts here */
> > +
> >  	union {
> >  		struct ice_rx_buf *rx_buf;
> >  		struct xdp_buff **xdp_buf;
> >  	};
> > -	struct xdp_buff xdp;
> > +	/* CL2 - 2nd cacheline starts here */
> > +	union {
> > +		struct ice_xdp_buff xdp_ext;
> > +		struct {
> > +			struct xdp_buff xdp;
> > +			struct ice_pkt_ctx pkt_ctx;
> > +		};
> > +	};
> >  	/* CL3 - 3rd cacheline starts here */
> >  	struct bpf_prog *xdp_prog;
> >  	u16 rx_offset;
> > @@ -325,6 +343,8 @@ struct ice_rx_ring {
> >  	u16 next_to_clean;
> >  	u16 first_desc;
> >  
> > +	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
> > +
> >  	/* stats structs */
> >  	struct ice_ring_stats *ring_stats;
> >  
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > index e1d49e1235b3..145883eec129 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > @@ -151,4 +151,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
> >  		       struct sk_buff *skb);
> >  void
> >  ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
> > +
> > +static inline void
> > +ice_xdp_meta_set_desc(struct xdp_buff *xdp,
> > +		      union ice_32b_rx_flex_desc *eop_desc)
> > +{
> > +	struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff,
> > +						    xdp_buff);
> > +
> > +	xdp_ext->pkt_ctx.eop_desc = eop_desc;
> > +}
> >  #endif /* !_ICE_TXRX_LIB_H_ */
> > -- 
> > 2.41.0
> >
Fijalkowski, Maciej Sept. 5, 2023, 5:53 p.m. UTC | #3
On Mon, Sep 04, 2023 at 08:11:09PM +0200, Larysa Zaremba wrote:
> On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> > On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > > In order to use XDP hints via kfuncs we need to put
> > > RX descriptor and ring pointers just next to xdp_buff.
> > > Same as in hints implementations in other drivers, we achieve
> > > this through putting xdp_buff into a child structure.
> > 
> > Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> > if i'm reading this right.
> >
> 
> ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could 
> replace pointer to xdp_buff, but not in reverse).
> 
> > > 
> > > Currently, xdp_buff is stored in the ring structure,
> > > so replace it with union that includes child structure.
> > > This way enough memory is available while existing XDP code
> > > remains isolated from hints.
> > > 
> > > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > > 64 bytes (single cache line). To place it at the start of a cache line,
> > > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > > leaves 128 bits available in CL3 for packet context extensions.
> > 
> > I believe ice_xdp_buff will be beefed up in later patches, so what is the
> > point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> > CL anyway.
> >
> 
> It is to at least keep xdp_buff and descriptor pointer (used for every hint) in 
> a single CL, other fields are situational.

Right, something must be moved...still, would be good to see perf
before/after :)

> 
> > > 
> > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > ---
> > >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> > >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> > >  3 files changed, 38 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > index 40f2f6dabb81..4e6546d9cf85 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> > >   * @xdp_prog: XDP program to run
> > >   * @xdp_ring: ring to be used for XDP_TX action
> > >   * @rx_buf: Rx buffer to store the XDP action
> > > + * @eop_desc: Last descriptor in packet to read metadata from
> > >   *
> > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > >   */
> > >  static void
> > >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > >  	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > -	    struct ice_rx_buf *rx_buf)
> > > +	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> > >  {
> > >  	unsigned int ret = ICE_XDP_PASS;
> > >  	u32 act;
> > > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > >  	if (!xdp_prog)
> > >  		goto exit;
> > >  
> > > +	ice_xdp_meta_set_desc(xdp, eop_desc);
> > 
> > I am currently not sure if for multi-buffer case HW repeats all the
> > necessary info within each descriptor for every frag? IOW shouldn't you be
> > using the ice_rx_ring::first_desc?
> > 
> > Would be good to test hints for mbuf case for sure.
> >
> 
> In the skb path, we take metadata from the last descriptor only, so this should 
> be fine. Really worth testing with mbuf though.

Ok, thanks!
Larysa Zaremba Sept. 7, 2023, 2:21 p.m. UTC | #4
On Tue, Sep 05, 2023 at 07:53:03PM +0200, Maciej Fijalkowski wrote:
> On Mon, Sep 04, 2023 at 08:11:09PM +0200, Larysa Zaremba wrote:
> > On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> > > On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > > > In order to use XDP hints via kfuncs we need to put
> > > > RX descriptor and ring pointers just next to xdp_buff.
> > > > Same as in hints implementations in other drivers, we achieve
> > > > this through putting xdp_buff into a child structure.
> > > 
> > > Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> > > if i'm reading this right.
> > >
> > 
> > ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could 
> > replace pointer to xdp_buff, but not in reverse).
> > 
> > > > 
> > > > Currently, xdp_buff is stored in the ring structure,
> > > > so replace it with union that includes child structure.
> > > > This way enough memory is available while existing XDP code
> > > > remains isolated from hints.
> > > > 
> > > > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > > > 64 bytes (single cache line). To place it at the start of a cache line,
> > > > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > > > leaves 128 bits available in CL3 for packet context extensions.
> > > 
> > > I believe ice_xdp_buff will be beefed up in later patches, so what is the
> > > point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> > > CL anyway.
> > >
> > 
> > It is to at least keep xdp_buff and descriptor pointer (used for every hint) in 
> > a single CL, other fields are situational.
> 
> Right, something must be moved...still, would be good to see perf
> before/after :)
> 
> > 
> > > > 
> > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > ---
> > > >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> > > >  3 files changed, 38 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > index 40f2f6dabb81..4e6546d9cf85 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> > > >   * @xdp_prog: XDP program to run
> > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > >   * @rx_buf: Rx buffer to store the XDP action
> > > > + * @eop_desc: Last descriptor in packet to read metadata from
> > > >   *
> > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > >   */
> > > >  static void
> > > >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > >  	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > -	    struct ice_rx_buf *rx_buf)
> > > > +	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> > > >  {
> > > >  	unsigned int ret = ICE_XDP_PASS;
> > > >  	u32 act;
> > > > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > >  	if (!xdp_prog)
> > > >  		goto exit;
> > > >  
> > > > +	ice_xdp_meta_set_desc(xdp, eop_desc);
> > > 
> > > I am currently not sure if for multi-buffer case HW repeats all the
> > > necessary info within each descriptor for every frag? IOW shouldn't you be
> > > using the ice_rx_ring::first_desc?
> > > 
> > > Would be good to test hints for mbuf case for sure.
> > >
> > 
> > In the skb path, we take metadata from the last descriptor only, so this should 
> > be fine. Really worth testing with mbuf though.

I retract my promise to test this with mbuf, as for now hints and mbuf are not 
supposed to go together [0].

Making sure they can co-exist peacefully can be a topic for another series.
For now I just can just say with high confidence that in case of multi-buffer 
frames, we do have all the supported metadata in the EoP descriptor.

[0] https://elixir.bootlin.com/linux/v6.5.2/source/kernel/bpf/offload.c#L234

> 
> Ok, thanks!
>
Stanislav Fomichev Sept. 7, 2023, 4:33 p.m. UTC | #5
On Thu, Sep 7, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
>
> On Tue, Sep 05, 2023 at 07:53:03PM +0200, Maciej Fijalkowski wrote:
> > On Mon, Sep 04, 2023 at 08:11:09PM +0200, Larysa Zaremba wrote:
> > > On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> > > > On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > > > > In order to use XDP hints via kfuncs we need to put
> > > > > RX descriptor and ring pointers just next to xdp_buff.
> > > > > Same as in hints implementations in other drivers, we achieve
> > > > > this through putting xdp_buff into a child structure.
> > > >
> > > > Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> > > > if i'm reading this right.
> > > >
> > >
> > > ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could
> > > replace pointer to xdp_buff, but not in reverse).
> > >
> > > > >
> > > > > Currently, xdp_buff is stored in the ring structure,
> > > > > so replace it with union that includes child structure.
> > > > > This way enough memory is available while existing XDP code
> > > > > remains isolated from hints.
> > > > >
> > > > > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > > > > 64 bytes (single cache line). To place it at the start of a cache line,
> > > > > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > > > > leaves 128 bits available in CL3 for packet context extensions.
> > > >
> > > > I believe ice_xdp_buff will be beefed up in later patches, so what is the
> > > > point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> > > > CL anyway.
> > > >
> > >
> > > It is to at least keep xdp_buff and descriptor pointer (used for every hint) in
> > > a single CL, other fields are situational.
> >
> > Right, something must be moved...still, would be good to see perf
> > before/after :)
> >
> > >
> > > > >
> > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > ---
> > > > >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> > > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> > > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> > > > >  3 files changed, 38 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > index 40f2f6dabb81..4e6546d9cf85 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> > > > >   * @xdp_prog: XDP program to run
> > > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > > >   * @rx_buf: Rx buffer to store the XDP action
> > > > > + * @eop_desc: Last descriptor in packet to read metadata from
> > > > >   *
> > > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > > >   */
> > > > >  static void
> > > > >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > >             struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > > -           struct ice_rx_buf *rx_buf)
> > > > > +           struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> > > > >  {
> > > > >         unsigned int ret = ICE_XDP_PASS;
> > > > >         u32 act;
> > > > > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > >         if (!xdp_prog)
> > > > >                 goto exit;
> > > > >
> > > > > +       ice_xdp_meta_set_desc(xdp, eop_desc);
> > > >
> > > > I am currently not sure if for multi-buffer case HW repeats all the
> > > > necessary info within each descriptor for every frag? IOW shouldn't you be
> > > > using the ice_rx_ring::first_desc?
> > > >
> > > > Would be good to test hints for mbuf case for sure.
> > > >
> > >
> > > In the skb path, we take metadata from the last descriptor only, so this should
> > > be fine. Really worth testing with mbuf though.
>
> I retract my promise to test this with mbuf, as for now hints and mbuf are not
> supposed to go together [0].

Hm, I don't think it's intentional. I don't see why mbuf and hints
can't coexist.
Anything pops into your mind? Otherwise, can change that mask to be
~(BPF_F_XDP_DEV_BOUND_ONLY|BPF_F_XDP_HAS_FRAGS) as part of the series
(or separately, up to you).

> Making sure they can co-exist peacefully can be a topic for another series.
> For now I just can just say with high confidence that in case of multi-buffer
> frames, we do have all the supported metadata in the EoP descriptor.
>
> [0] https://elixir.bootlin.com/linux/v6.5.2/source/kernel/bpf/offload.c#L234
>
> >
> > Ok, thanks!
> >
Fijalkowski, Maciej Sept. 7, 2023, 4:42 p.m. UTC | #6
On Thu, Sep 07, 2023 at 09:33:14AM -0700, Stanislav Fomichev wrote:
> On Thu, Sep 7, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
> >
> > On Tue, Sep 05, 2023 at 07:53:03PM +0200, Maciej Fijalkowski wrote:
> > > On Mon, Sep 04, 2023 at 08:11:09PM +0200, Larysa Zaremba wrote:
> > > > On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> > > > > On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > > > > > In order to use XDP hints via kfuncs we need to put
> > > > > > RX descriptor and ring pointers just next to xdp_buff.
> > > > > > Same as in hints implementations in other drivers, we achieve
> > > > > > this through putting xdp_buff into a child structure.
> > > > >
> > > > > Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> > > > > if i'm reading this right.
> > > > >
> > > >
> > > > ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could
> > > > replace pointer to xdp_buff, but not in reverse).
> > > >
> > > > > >
> > > > > > Currently, xdp_buff is stored in the ring structure,
> > > > > > so replace it with union that includes child structure.
> > > > > > This way enough memory is available while existing XDP code
> > > > > > remains isolated from hints.
> > > > > >
> > > > > > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > > > > > 64 bytes (single cache line). To place it at the start of a cache line,
> > > > > > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > > > > > leaves 128 bits available in CL3 for packet context extensions.
> > > > >
> > > > > I believe ice_xdp_buff will be beefed up in later patches, so what is the
> > > > > point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> > > > > CL anyway.
> > > > >
> > > >
> > > > It is to at least keep xdp_buff and descriptor pointer (used for every hint) in
> > > > a single CL, other fields are situational.
> > >
> > > Right, something must be moved...still, would be good to see perf
> > > before/after :)
> > >
> > > >
> > > > > >
> > > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > > ---
> > > > > >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> > > > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> > > > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> > > > > >  3 files changed, 38 insertions(+), 5 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > index 40f2f6dabb81..4e6546d9cf85 100644
> > > > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> > > > > >   * @xdp_prog: XDP program to run
> > > > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > > > >   * @rx_buf: Rx buffer to store the XDP action
> > > > > > + * @eop_desc: Last descriptor in packet to read metadata from
> > > > > >   *
> > > > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > > > >   */
> > > > > >  static void
> > > > > >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > >             struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > > > -           struct ice_rx_buf *rx_buf)
> > > > > > +           struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> > > > > >  {
> > > > > >         unsigned int ret = ICE_XDP_PASS;
> > > > > >         u32 act;
> > > > > > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > >         if (!xdp_prog)
> > > > > >                 goto exit;
> > > > > >
> > > > > > +       ice_xdp_meta_set_desc(xdp, eop_desc);
> > > > >
> > > > > I am currently not sure if for multi-buffer case HW repeats all the
> > > > > necessary info within each descriptor for every frag? IOW shouldn't you be
> > > > > using the ice_rx_ring::first_desc?
> > > > >
> > > > > Would be good to test hints for mbuf case for sure.
> > > > >
> > > >
> > > > In the skb path, we take metadata from the last descriptor only, so this should
> > > > be fine. Really worth testing with mbuf though.
> >
> > I retract my promise to test this with mbuf, as for now hints and mbuf are not
> > supposed to go together [0].
> 
> Hm, I don't think it's intentional. I don't see why mbuf and hints
> can't coexist.

They should coexist, xdp mbuf support is an integral part of driver as we
know:)

> Anything pops into your mind? Otherwise, can change that mask to be
> ~(BPF_F_XDP_DEV_BOUND_ONLY|BPF_F_XDP_HAS_FRAGS) as part of the series
> (or separately, up to you).

+1

> 
> > Making sure they can co-exist peacefully can be a topic for another series.
> > For now I just can just say with high confidence that in case of multi-buffer
> > frames, we do have all the supported metadata in the EoP descriptor.
> >
> > [0] https://elixir.bootlin.com/linux/v6.5.2/source/kernel/bpf/offload.c#L234
> >
> > >
> > > Ok, thanks!
> > >
>
Fijalkowski, Maciej Sept. 7, 2023, 4:43 p.m. UTC | #7
On Thu, Sep 07, 2023 at 06:42:33PM +0200, Maciej Fijalkowski wrote:
> On Thu, Sep 07, 2023 at 09:33:14AM -0700, Stanislav Fomichev wrote:
> > On Thu, Sep 7, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
> > >
> > > On Tue, Sep 05, 2023 at 07:53:03PM +0200, Maciej Fijalkowski wrote:
> > > > On Mon, Sep 04, 2023 at 08:11:09PM +0200, Larysa Zaremba wrote:
> > > > > On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> > > > > > On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > > > > > > In order to use XDP hints via kfuncs we need to put
> > > > > > > RX descriptor and ring pointers just next to xdp_buff.
> > > > > > > Same as in hints implementations in other drivers, we achieve
> > > > > > > this through putting xdp_buff into a child structure.
> > > > > >
> > > > > > Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> > > > > > if i'm reading this right.
> > > > > >
> > > > >
> > > > > ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could
> > > > > replace pointer to xdp_buff, but not in reverse).
> > > > >
> > > > > > >
> > > > > > > Currently, xdp_buff is stored in the ring structure,
> > > > > > > so replace it with union that includes child structure.
> > > > > > > This way enough memory is available while existing XDP code
> > > > > > > remains isolated from hints.
> > > > > > >
> > > > > > > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > > > > > > 64 bytes (single cache line). To place it at the start of a cache line,
> > > > > > > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > > > > > > leaves 128 bits available in CL3 for packet context extensions.
> > > > > >
> > > > > > I believe ice_xdp_buff will be beefed up in later patches, so what is the
> > > > > > point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> > > > > > CL anyway.
> > > > > >
> > > > >
> > > > > It is to at least keep xdp_buff and descriptor pointer (used for every hint) in
> > > > > a single CL, other fields are situational.
> > > >
> > > > Right, something must be moved...still, would be good to see perf
> > > > before/after :)
> > > >
> > > > >
> > > > > > >
> > > > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > > > ---
> > > > > > >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> > > > > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> > > > > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> > > > > > >  3 files changed, 38 insertions(+), 5 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > > index 40f2f6dabb81..4e6546d9cf85 100644
> > > > > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> > > > > > >   * @xdp_prog: XDP program to run
> > > > > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > > > > >   * @rx_buf: Rx buffer to store the XDP action
> > > > > > > + * @eop_desc: Last descriptor in packet to read metadata from
> > > > > > >   *
> > > > > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > > > > >   */
> > > > > > >  static void
> > > > > > >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > > >             struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > > > > -           struct ice_rx_buf *rx_buf)
> > > > > > > +           struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> > > > > > >  {
> > > > > > >         unsigned int ret = ICE_XDP_PASS;
> > > > > > >         u32 act;
> > > > > > > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > > >         if (!xdp_prog)
> > > > > > >                 goto exit;
> > > > > > >
> > > > > > > +       ice_xdp_meta_set_desc(xdp, eop_desc);
> > > > > >
> > > > > > I am currently not sure if for multi-buffer case HW repeats all the
> > > > > > necessary info within each descriptor for every frag? IOW shouldn't you be
> > > > > > using the ice_rx_ring::first_desc?
> > > > > >
> > > > > > Would be good to test hints for mbuf case for sure.
> > > > > >
> > > > >
> > > > > In the skb path, we take metadata from the last descriptor only, so this should
> > > > > be fine. Really worth testing with mbuf though.
> > >
> > > I retract my promise to test this with mbuf, as for now hints and mbuf are not
> > > supposed to go together [0].
> > 
> > Hm, I don't think it's intentional. I don't see why mbuf and hints
> > can't coexist.
> 
> They should coexist, xdp mbuf support is an integral part of driver as we
> know:)
> 
> > Anything pops into your mind? Otherwise, can change that mask to be
> > ~(BPF_F_XDP_DEV_BOUND_ONLY|BPF_F_XDP_HAS_FRAGS) as part of the series
> > (or separately, up to you).
> 
> +1

IMHO that should be a standalone patch.

> 
> > 
> > > Making sure they can co-exist peacefully can be a topic for another series.
> > > For now I just can just say with high confidence that in case of multi-buffer
> > > frames, we do have all the supported metadata in the EoP descriptor.
> > >
> > > [0] https://elixir.bootlin.com/linux/v6.5.2/source/kernel/bpf/offload.c#L234
> > >
> > > >
> > > > Ok, thanks!
> > > >
> >
Larysa Zaremba Sept. 13, 2023, 3:40 p.m. UTC | #8
On Thu, Sep 07, 2023 at 06:43:58PM +0200, Maciej Fijalkowski wrote:
> On Thu, Sep 07, 2023 at 06:42:33PM +0200, Maciej Fijalkowski wrote:
> > On Thu, Sep 07, 2023 at 09:33:14AM -0700, Stanislav Fomichev wrote:
> > > On Thu, Sep 7, 2023 at 7:27 AM Larysa Zaremba <larysa.zaremba@intel.com> wrote:
> > > >
> > > > On Tue, Sep 05, 2023 at 07:53:03PM +0200, Maciej Fijalkowski wrote:
> > > > > On Mon, Sep 04, 2023 at 08:11:09PM +0200, Larysa Zaremba wrote:
> > > > > > On Mon, Sep 04, 2023 at 05:32:14PM +0200, Maciej Fijalkowski wrote:
> > > > > > > On Thu, Aug 24, 2023 at 09:26:44PM +0200, Larysa Zaremba wrote:
> > > > > > > > In order to use XDP hints via kfuncs we need to put
> > > > > > > > RX descriptor and ring pointers just next to xdp_buff.
> > > > > > > > Same as in hints implementations in other drivers, we achieve
> > > > > > > > this through putting xdp_buff into a child structure.
> > > > > > >
> > > > > > > Don't you mean a parent struct? xdp_buff will be 'child' of ice_xdp_buff
> > > > > > > if i'm reading this right.
> > > > > > >
> > > > > >
> > > > > > ice_xdp_buff is a child in terms of inheritance (pointer to ice_xdp_buff could
> > > > > > replace pointer to xdp_buff, but not in reverse).
> > > > > >
> > > > > > > >
> > > > > > > > Currently, xdp_buff is stored in the ring structure,
> > > > > > > > so replace it with union that includes child structure.
> > > > > > > > This way enough memory is available while existing XDP code
> > > > > > > > remains isolated from hints.
> > > > > > > >
> > > > > > > > Minimum size of the new child structure (ice_xdp_buff) is exactly
> > > > > > > > 64 bytes (single cache line). To place it at the start of a cache line,
> > > > > > > > move 'next' field from CL1 to CL3, as it isn't used often. This still
> > > > > > > > leaves 128 bits available in CL3 for packet context extensions.
> > > > > > >
> > > > > > > I believe ice_xdp_buff will be beefed up in later patches, so what is the
> > > > > > > point of moving 'next' ? We won't be able to keep ice_xdp_buff in a single
> > > > > > > CL anyway.
> > > > > > >
> > > > > >
> > > > > > It is to at least keep xdp_buff and descriptor pointer (used for every hint) in
> > > > > > a single CL, other fields are situational.
> > > > >
> > > > > Right, something must be moved...still, would be good to see perf
> > > > > before/after :)
> > > > >
> > > > > >
> > > > > > > >
> > > > > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > > > > ---
> > > > > > > >  drivers/net/ethernet/intel/ice/ice_txrx.c     |  7 +++--
> > > > > > > >  drivers/net/ethernet/intel/ice/ice_txrx.h     | 26 ++++++++++++++++---
> > > > > > > >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 +++++++
> > > > > > > >  3 files changed, 38 insertions(+), 5 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > > > index 40f2f6dabb81..4e6546d9cf85 100644
> > > > > > > > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > > > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > > > > > > > @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
> > > > > > > >   * @xdp_prog: XDP program to run
> > > > > > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > > > > > >   * @rx_buf: Rx buffer to store the XDP action
> > > > > > > > + * @eop_desc: Last descriptor in packet to read metadata from
> > > > > > > >   *
> > > > > > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > > > > > >   */
> > > > > > > >  static void
> > > > > > > >  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > > > >             struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > > > > > -           struct ice_rx_buf *rx_buf)
> > > > > > > > +           struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
> > > > > > > >  {
> > > > > > > >         unsigned int ret = ICE_XDP_PASS;
> > > > > > > >         u32 act;
> > > > > > > > @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > > > >         if (!xdp_prog)
> > > > > > > >                 goto exit;
> > > > > > > >
> > > > > > > > +       ice_xdp_meta_set_desc(xdp, eop_desc);
> > > > > > >
> > > > > > > I am currently not sure if for multi-buffer case HW repeats all the
> > > > > > > necessary info within each descriptor for every frag? IOW shouldn't you be
> > > > > > > using the ice_rx_ring::first_desc?
> > > > > > >
> > > > > > > Would be good to test hints for mbuf case for sure.
> > > > > > >
> > > > > >
> > > > > > In the skb path, we take metadata from the last descriptor only, so this should
> > > > > > be fine. Really worth testing with mbuf though.
> > > >
> > > > I retract my promise to test this with mbuf, as for now hints and mbuf are not
> > > > supposed to go together [0].
> > > 
> > > Hm, I don't think it's intentional. I don't see why mbuf and hints
> > > can't coexist.
> > 
> > They should coexist, xdp mbuf support is an integral part of driver as we
> > know:)
> > 
> > > Anything pops into your mind? Otherwise, can change that mask to be
> > > ~(BPF_F_XDP_DEV_BOUND_ONLY|BPF_F_XDP_HAS_FRAGS) as part of the series
> > > (or separately, up to you).
> > 
> > +1
> 
> IMHO that should be a standalone patch.
>

Sorry for not answering, I was stuck in testing and debugging, wanted to come 
back with a definitive answer. Fortunately, the problems were not caused by
hints and mbuf clashing on some fundamental level, everything works now, so I 
will send the patch that allows to combine them tomorrow.

> > 
> > > 
> > > > Making sure they can co-exist peacefully can be a topic for another series.
> > > > For now I just can just say with high confidence that in case of multi-buffer
> > > > frames, we do have all the supported metadata in the EoP descriptor.
> > > >
> > > > [0] https://elixir.bootlin.com/linux/v6.5.2/source/kernel/bpf/offload.c#L234
> > > >
> > > > >
> > > > > Ok, thanks!
> > > > >
> > >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 40f2f6dabb81..4e6546d9cf85 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -557,13 +557,14 @@  ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
  * @xdp_prog: XDP program to run
  * @xdp_ring: ring to be used for XDP_TX action
  * @rx_buf: Rx buffer to store the XDP action
+ * @eop_desc: Last descriptor in packet to read metadata from
  *
  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
  */
 static void
 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
-	    struct ice_rx_buf *rx_buf)
+	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
 {
 	unsigned int ret = ICE_XDP_PASS;
 	u32 act;
@@ -571,6 +572,8 @@  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	if (!xdp_prog)
 		goto exit;
 
+	ice_xdp_meta_set_desc(xdp, eop_desc);
+
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	switch (act) {
 	case XDP_PASS:
@@ -1240,7 +1243,7 @@  int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		if (ice_is_non_eop(rx_ring, rx_desc))
 			continue;
 
-		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
+		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
 		if (rx_buf->act == ICE_XDP_PASS)
 			goto construct_skb;
 		total_rx_bytes += xdp_get_buff_len(xdp);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 166413fc33f4..d0ab2c4c0c91 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -257,6 +257,18 @@  enum ice_rx_dtype {
 	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
 };
 
+struct ice_pkt_ctx {
+	const union ice_32b_rx_flex_desc *eop_desc;
+};
+
+struct ice_xdp_buff {
+	struct xdp_buff xdp_buff;
+	struct ice_pkt_ctx pkt_ctx;
+};
+
+/* Required for compatibility with xdp_buffs from xsk_pool */
+static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
+
 /* indices into GLINT_ITR registers */
 #define ICE_RX_ITR	ICE_IDX_ITR0
 #define ICE_TX_ITR	ICE_IDX_ITR1
@@ -298,7 +310,6 @@  enum ice_dynamic_itr {
 /* descriptor ring, associated with a VSI */
 struct ice_rx_ring {
 	/* CL1 - 1st cacheline starts here */
-	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
 	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
 	struct net_device *netdev;	/* netdev ring maps to */
@@ -310,12 +321,19 @@  struct ice_rx_ring {
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
 	u16 next_to_alloc;
-	/* CL2 - 2nd cacheline starts here */
+
 	union {
 		struct ice_rx_buf *rx_buf;
 		struct xdp_buff **xdp_buf;
 	};
-	struct xdp_buff xdp;
+	/* CL2 - 2nd cacheline starts here */
+	union {
+		struct ice_xdp_buff xdp_ext;
+		struct {
+			struct xdp_buff xdp;
+			struct ice_pkt_ctx pkt_ctx;
+		};
+	};
 	/* CL3 - 3rd cacheline starts here */
 	struct bpf_prog *xdp_prog;
 	u16 rx_offset;
@@ -325,6 +343,8 @@  struct ice_rx_ring {
 	u16 next_to_clean;
 	u16 first_desc;
 
+	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
+
 	/* stats structs */
 	struct ice_ring_stats *ring_stats;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index e1d49e1235b3..145883eec129 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -151,4 +151,14 @@  ice_process_skb_fields(struct ice_rx_ring *rx_ring,
 		       struct sk_buff *skb);
 void
 ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+
+static inline void
+ice_xdp_meta_set_desc(struct xdp_buff *xdp,
+		      union ice_32b_rx_flex_desc *eop_desc)
+{
+	struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff,
+						    xdp_buff);
+
+	xdp_ext->pkt_ctx.eop_desc = eop_desc;
+}
 #endif /* !_ICE_TXRX_LIB_H_ */