diff mbox series

[bpf-next,v6,07/18] ice: Support XDP hints in AF_XDP ZC mode

Message ID 20231012170524.21085-8-larysa.zaremba@intel.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series XDP metadata via kfuncs for ice + VLAN hint | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1340 this patch: 1340
netdev/cc_maintainers warning 6 maintainers not CCed: intel-wired-lan@lists.osuosl.org pabeni@redhat.com edumazet@google.com jesse.brandeburg@intel.com anthony.l.nguyen@intel.com davem@davemloft.net
netdev/build_clang success Errors and warnings before: 1363 this patch: 1363
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1363 this patch: 1363
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 61 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 fail Logs for veristat

Commit Message

Larysa Zaremba Oct. 12, 2023, 5:05 p.m. UTC
In AF_XDP ZC, xdp_buff is not stored on ring,
instead it is provided by xsk_buff_pool.
Space for metadata sources right after such buffers was already reserved
in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
This makes the implementation rather straightforward.

Update AF_XDP ZC packet processing to support XDP hints.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

Comments

Fijalkowski, Maciej Oct. 17, 2023, 4:13 p.m. UTC | #1
On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> In AF_XDP ZC, xdp_buff is not stored on ring,
> instead it is provided by xsk_buff_pool.
> Space for metadata sources right after such buffers was already reserved
> in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> This makes the implementation rather straightforward.
> 
> Update AF_XDP ZC packet processing to support XDP hints.
> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> index ef778b8e6d1b..6ca620b2fbdd 100644
> --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
>  	return ICE_XDP_CONSUMED;
>  }
>  
> +/**
> + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> + * @xdp: xdp_buff used as input to the XDP program
> + * @eop_desc: End of packet descriptor
> + * @rx_ring: Rx ring with packet context
> + *
> + * In regular XDP, xdp_buff is placed inside the ring structure,
> + * just before the packet context, so the latter can be accessed
> + * with xdp_buff address only at all times, but in ZC mode,
> + * xdp_buffs come from the pool, so we need to reinitialize
> + * context for every packet.
> + *
> + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> + * right after xdp_buff, for our private use.
> + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> + */
> +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> +				   union ice_32b_rx_flex_desc *eop_desc,
> +				   struct ice_rx_ring *rx_ring)
> +{
> +	XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> +	((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;

I will be loud thinking over here, but this could be set in
ice_fill_rx_descs(), while grabbing xdp_buffs from xsk_pool, should
minimize the performance overhead.

But then again you address that with static branch in later patch.

OTOH, I was thinking that we could come with xsk_buff_pool API that would
let drivers assign this at setup time. Similar what is being done with dma
mappings.

Magnus, do you think it is worth the hassle? Thoughts?

Or should we advise any other driver that support hints to mimic static
branch solution?

> +	ice_xdp_meta_set_desc(xdp, eop_desc);
> +}
> +
>  /**
>   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
>   * @rx_ring: Rx ring
>   * @xdp: xdp_buff used as input to the XDP program
>   * @xdp_prog: XDP program to run
>   * @xdp_ring: ring to be used for XDP_TX action
> + * @rx_desc: packet descriptor
>   *
>   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
>   */
>  static int
>  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> -	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> +	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> +	       union ice_32b_rx_flex_desc *rx_desc)
>  {
>  	int err, result = ICE_XDP_PASS;
>  	u32 act;
>  
> +	ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
>  	act = bpf_prog_run_xdp(xdp_prog, xdp);
>  
>  	if (likely(act == XDP_REDIRECT)) {
> @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
>  		if (ice_is_non_eop(rx_ring, rx_desc))
>  			continue;
>  
> -		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> +		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> +					 rx_desc);
>  		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
>  			xdp_xmit |= xdp_res;
>  		} else if (xdp_res == ICE_XDP_EXIT) {
> -- 
> 2.41.0
>
Magnus Karlsson Oct. 17, 2023, 4:37 p.m. UTC | #2
On Tue, 17 Oct 2023 at 18:13, Maciej Fijalkowski
<maciej.fijalkowski@intel.com> wrote:
>
> On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> > In AF_XDP ZC, xdp_buff is not stored on ring,
> > instead it is provided by xsk_buff_pool.
> > Space for metadata sources right after such buffers was already reserved
> > in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> > This makes the implementation rather straightforward.
> >
> > Update AF_XDP ZC packet processing to support XDP hints.
> >
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
> >  1 file changed, 32 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > index ef778b8e6d1b..6ca620b2fbdd 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
> >       return ICE_XDP_CONSUMED;
> >  }
> >
> > +/**
> > + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> > + * @xdp: xdp_buff used as input to the XDP program
> > + * @eop_desc: End of packet descriptor
> > + * @rx_ring: Rx ring with packet context
> > + *
> > + * In regular XDP, xdp_buff is placed inside the ring structure,
> > + * just before the packet context, so the latter can be accessed
> > + * with xdp_buff address only at all times, but in ZC mode,
> > + * xdp_buffs come from the pool, so we need to reinitialize
> > + * context for every packet.
> > + *
> > + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> > + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> > + * right after xdp_buff, for our private use.
> > + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> > + */
> > +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> > +                                union ice_32b_rx_flex_desc *eop_desc,
> > +                                struct ice_rx_ring *rx_ring)
> > +{
> > +     XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> > +     ((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
>
> I will be loud thinking over here, but this could be set in
> ice_fill_rx_descs(), while grabbing xdp_buffs from xsk_pool, should
> minimize the performance overhead.
>
> But then again you address that with static branch in later patch.
>
> OTOH, I was thinking that we could come with xsk_buff_pool API that would
> let drivers assign this at setup time. Similar what is being done with dma
> mappings.
>
> Magnus, do you think it is worth the hassle? Thoughts?

I would measure the overhead of the current assignment and if it is
significant (incurs a cache miss for example), then why not try out
your idea. Usually good not to have to touch things when not needed.

> Or should we advise any other driver that support hints to mimic static
> branch solution?
>
> > +     ice_xdp_meta_set_desc(xdp, eop_desc);
> > +}
> > +
> >  /**
> >   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
> >   * @rx_ring: Rx ring
> >   * @xdp: xdp_buff used as input to the XDP program
> >   * @xdp_prog: XDP program to run
> >   * @xdp_ring: ring to be used for XDP_TX action
> > + * @rx_desc: packet descriptor
> >   *
> >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> >   */
> >  static int
> >  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > -            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> > +            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > +            union ice_32b_rx_flex_desc *rx_desc)
> >  {
> >       int err, result = ICE_XDP_PASS;
> >       u32 act;
> >
> > +     ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
> >       act = bpf_prog_run_xdp(xdp_prog, xdp);
> >
> >       if (likely(act == XDP_REDIRECT)) {
> > @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> >               if (ice_is_non_eop(rx_ring, rx_desc))
> >                       continue;
> >
> > -             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> > +             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> > +                                      rx_desc);
> >               if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
> >                       xdp_xmit |= xdp_res;
> >               } else if (xdp_res == ICE_XDP_EXIT) {
> > --
> > 2.41.0
> >
Fijalkowski, Maciej Oct. 17, 2023, 4:45 p.m. UTC | #3
On Tue, Oct 17, 2023 at 06:37:07PM +0200, Magnus Karlsson wrote:
> On Tue, 17 Oct 2023 at 18:13, Maciej Fijalkowski
> <maciej.fijalkowski@intel.com> wrote:
> >
> > On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> > > In AF_XDP ZC, xdp_buff is not stored on ring,
> > > instead it is provided by xsk_buff_pool.
> > > Space for metadata sources right after such buffers was already reserved
> > > in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> > > This makes the implementation rather straightforward.
> > >
> > > Update AF_XDP ZC packet processing to support XDP hints.
> > >
> > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > ---
> > >  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
> > >  1 file changed, 32 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > index ef778b8e6d1b..6ca620b2fbdd 100644
> > > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
> > >       return ICE_XDP_CONSUMED;
> > >  }
> > >
> > > +/**
> > > + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> > > + * @xdp: xdp_buff used as input to the XDP program
> > > + * @eop_desc: End of packet descriptor
> > > + * @rx_ring: Rx ring with packet context
> > > + *
> > > + * In regular XDP, xdp_buff is placed inside the ring structure,
> > > + * just before the packet context, so the latter can be accessed
> > > + * with xdp_buff address only at all times, but in ZC mode,
> > > + * xdp_buffs come from the pool, so we need to reinitialize
> > > + * context for every packet.
> > > + *
> > > + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> > > + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> > > + * right after xdp_buff, for our private use.
> > > + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> > > + */
> > > +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> > > +                                union ice_32b_rx_flex_desc *eop_desc,
> > > +                                struct ice_rx_ring *rx_ring)
> > > +{
> > > +     XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> > > +     ((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
> >
> > I will be loud thinking over here, but this could be set in
> > ice_fill_rx_descs(), while grabbing xdp_buffs from xsk_pool, should
> > minimize the performance overhead.
> >
> > But then again you address that with static branch in later patch.
> >
> > OTOH, I was thinking that we could come with xsk_buff_pool API that would
> > let drivers assign this at setup time. Similar what is being done with dma
> > mappings.
> >
> > Magnus, do you think it is worth the hassle? Thoughts?
> 
> I would measure the overhead of the current assignment and if it is
> significant (incurs a cache miss for example), then why not try out
> your idea. Usually good not to have to touch things when not needed.

Larysa measured that because I asked for that previously and impact was
around 6%. Then look at patch 11/18 how this was addressed.

Other ZC drivers didn't report the impact but i am rather sure they were also
affected. So i was thinking whether we should have some generic solution
within pool or every ZC driver handles that on its own.

> 
> > Or should we advise any other driver that support hints to mimic static
> > branch solution?
> >
> > > +     ice_xdp_meta_set_desc(xdp, eop_desc);
> > > +}
> > > +
> > >  /**
> > >   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
> > >   * @rx_ring: Rx ring
> > >   * @xdp: xdp_buff used as input to the XDP program
> > >   * @xdp_prog: XDP program to run
> > >   * @xdp_ring: ring to be used for XDP_TX action
> > > + * @rx_desc: packet descriptor
> > >   *
> > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > >   */
> > >  static int
> > >  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > -            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> > > +            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > +            union ice_32b_rx_flex_desc *rx_desc)
> > >  {
> > >       int err, result = ICE_XDP_PASS;
> > >       u32 act;
> > >
> > > +     ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
> > >       act = bpf_prog_run_xdp(xdp_prog, xdp);
> > >
> > >       if (likely(act == XDP_REDIRECT)) {
> > > @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> > >               if (ice_is_non_eop(rx_ring, rx_desc))
> > >                       continue;
> > >
> > > -             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> > > +             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> > > +                                      rx_desc);
> > >               if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
> > >                       xdp_xmit |= xdp_res;
> > >               } else if (xdp_res == ICE_XDP_EXIT) {
> > > --
> > > 2.41.0
> > >
Larysa Zaremba Oct. 17, 2023, 5:03 p.m. UTC | #4
On Tue, Oct 17, 2023 at 06:45:02PM +0200, Maciej Fijalkowski wrote:
> On Tue, Oct 17, 2023 at 06:37:07PM +0200, Magnus Karlsson wrote:
> > On Tue, 17 Oct 2023 at 18:13, Maciej Fijalkowski
> > <maciej.fijalkowski@intel.com> wrote:
> > >
> > > On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> > > > In AF_XDP ZC, xdp_buff is not stored on ring,
> > > > instead it is provided by xsk_buff_pool.
> > > > Space for metadata sources right after such buffers was already reserved
> > > > in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> > > > This makes the implementation rather straightforward.
> > > >
> > > > Update AF_XDP ZC packet processing to support XDP hints.
> > > >
> > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > ---
> > > >  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
> > > >  1 file changed, 32 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > > index ef778b8e6d1b..6ca620b2fbdd 100644
> > > > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > > @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
> > > >       return ICE_XDP_CONSUMED;
> > > >  }
> > > >
> > > > +/**
> > > > + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> > > > + * @xdp: xdp_buff used as input to the XDP program
> > > > + * @eop_desc: End of packet descriptor
> > > > + * @rx_ring: Rx ring with packet context
> > > > + *
> > > > + * In regular XDP, xdp_buff is placed inside the ring structure,
> > > > + * just before the packet context, so the latter can be accessed
> > > > + * with xdp_buff address only at all times, but in ZC mode,
> > > > + * xdp_buffs come from the pool, so we need to reinitialize
> > > > + * context for every packet.
> > > > + *
> > > > + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> > > > + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> > > > + * right after xdp_buff, for our private use.
> > > > + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> > > > + */
> > > > +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> > > > +                                union ice_32b_rx_flex_desc *eop_desc,
> > > > +                                struct ice_rx_ring *rx_ring)
> > > > +{
> > > > +     XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> > > > +     ((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
> > >
> > > I will be loud thinking over here, but this could be set in
> > > ice_fill_rx_descs(), while grabbing xdp_buffs from xsk_pool, should
> > > minimize the performance overhead.

I am not sure about that. Packet context consists of:
* VLAN protocol
* cached time

Both of those can be updated without stopping traffic, so we cannot set this 
at setup time.

> > >
> > > But then again you address that with static branch in later patch.
> > >
> > > OTOH, I was thinking that we could come with xsk_buff_pool API that would
> > > let drivers assign this at setup time. Similar what is being done with dma
> > > mappings.
> > >
> > > Magnus, do you think it is worth the hassle? Thoughts?
> > 
> > I would measure the overhead of the current assignment and if it is
> > significant (incurs a cache miss for example), then why not try out
> > your idea. Usually good not to have to touch things when not needed.
> 
> Larysa measured that because I asked for that previously and impact was
> around 6%. Then look at patch 11/18 how this was addressed.
> 
> Other ZC drivers didn't report the impact but i am rather sure they were also
> affected. So i was thinking whether we should have some generic solution
> within pool or every ZC driver handles that on its own.
> 
> > 
> > > Or should we advise any other driver that support hints to mimic static
> > > branch solution?
> > >
> > > > +     ice_xdp_meta_set_desc(xdp, eop_desc);
> > > > +}
> > > > +
> > > >  /**
> > > >   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
> > > >   * @rx_ring: Rx ring
> > > >   * @xdp: xdp_buff used as input to the XDP program
> > > >   * @xdp_prog: XDP program to run
> > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > > + * @rx_desc: packet descriptor
> > > >   *
> > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > >   */
> > > >  static int
> > > >  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > -            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> > > > +            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > +            union ice_32b_rx_flex_desc *rx_desc)
> > > >  {
> > > >       int err, result = ICE_XDP_PASS;
> > > >       u32 act;
> > > >
> > > > +     ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
> > > >       act = bpf_prog_run_xdp(xdp_prog, xdp);
> > > >
> > > >       if (likely(act == XDP_REDIRECT)) {
> > > > @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> > > >               if (ice_is_non_eop(rx_ring, rx_desc))
> > > >                       continue;
> > > >
> > > > -             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> > > > +             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> > > > +                                      rx_desc);
> > > >               if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
> > > >                       xdp_xmit |= xdp_res;
> > > >               } else if (xdp_res == ICE_XDP_EXIT) {
> > > > --
> > > > 2.41.0
> > > >
Fijalkowski, Maciej Oct. 18, 2023, 10:43 a.m. UTC | #5
On Tue, Oct 17, 2023 at 07:03:57PM +0200, Larysa Zaremba wrote:
> On Tue, Oct 17, 2023 at 06:45:02PM +0200, Maciej Fijalkowski wrote:
> > On Tue, Oct 17, 2023 at 06:37:07PM +0200, Magnus Karlsson wrote:
> > > On Tue, 17 Oct 2023 at 18:13, Maciej Fijalkowski
> > > <maciej.fijalkowski@intel.com> wrote:
> > > >
> > > > On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> > > > > In AF_XDP ZC, xdp_buff is not stored on ring,
> > > > > instead it is provided by xsk_buff_pool.
> > > > > Space for metadata sources right after such buffers was already reserved
> > > > > in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> > > > > This makes the implementation rather straightforward.
> > > > >
> > > > > Update AF_XDP ZC packet processing to support XDP hints.
> > > > >
> > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > ---
> > > > >  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
> > > > >  1 file changed, 32 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > > > index ef778b8e6d1b..6ca620b2fbdd 100644
> > > > > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > > > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > > > > @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
> > > > >       return ICE_XDP_CONSUMED;
> > > > >  }
> > > > >
> > > > > +/**
> > > > > + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> > > > > + * @xdp: xdp_buff used as input to the XDP program
> > > > > + * @eop_desc: End of packet descriptor
> > > > > + * @rx_ring: Rx ring with packet context
> > > > > + *
> > > > > + * In regular XDP, xdp_buff is placed inside the ring structure,
> > > > > + * just before the packet context, so the latter can be accessed
> > > > > + * with xdp_buff address only at all times, but in ZC mode,
> > > > > + * xdp_buffs come from the pool, so we need to reinitialize
> > > > > + * context for every packet.
> > > > > + *
> > > > > + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> > > > > + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> > > > > + * right after xdp_buff, for our private use.
> > > > > + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> > > > > + */
> > > > > +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> > > > > +                                union ice_32b_rx_flex_desc *eop_desc,
> > > > > +                                struct ice_rx_ring *rx_ring)
> > > > > +{
> > > > > +     XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> > > > > +     ((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
> > > >
> > > > I will be loud thinking over here, but this could be set in
> > > > ice_fill_rx_descs(), while grabbing xdp_buffs from xsk_pool, should
> > > > minimize the performance overhead.
> 
> I am not sure about that. Packet context consists of:
> * VLAN protocol
> * cached time
> 
> Both of those can be updated without stopping traffic, so we cannot set this 
> at setup time.

I was referring to setting the pointer to pkt_ctx. Similarly mlx5 sets
setting pointer to rq during alloc but cqe ptr is set per packet.

Regardless, let us proceed with what you have and later on maybe address
what I was bringing up here.

> 
> > > >
> > > > But then again you address that with static branch in later patch.
> > > >
> > > > OTOH, I was thinking that we could come with xsk_buff_pool API that would
> > > > let drivers assign this at setup time. Similar what is being done with dma
> > > > mappings.
> > > >
> > > > Magnus, do you think it is worth the hassle? Thoughts?
> > > 
> > > I would measure the overhead of the current assignment and if it is
> > > significant (incurs a cache miss for example), then why not try out
> > > your idea. Usually good not to have to touch things when not needed.
> > 
> > Larysa measured that because I asked for that previously and impact was
> > around 6%. Then look at patch 11/18 how this was addressed.
> > 
> > Other ZC drivers didn't report the impact but i am rather sure they were also
> > affected. So i was thinking whether we should have some generic solution
> > within pool or every ZC driver handles that on its own.
> > 
> > > 
> > > > Or should we advise any other driver that support hints to mimic static
> > > > branch solution?
> > > >
> > > > > +     ice_xdp_meta_set_desc(xdp, eop_desc);
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
> > > > >   * @rx_ring: Rx ring
> > > > >   * @xdp: xdp_buff used as input to the XDP program
> > > > >   * @xdp_prog: XDP program to run
> > > > >   * @xdp_ring: ring to be used for XDP_TX action
> > > > > + * @rx_desc: packet descriptor
> > > > >   *
> > > > >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> > > > >   */
> > > > >  static int
> > > > >  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > > > > -            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> > > > > +            struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > > > > +            union ice_32b_rx_flex_desc *rx_desc)
> > > > >  {
> > > > >       int err, result = ICE_XDP_PASS;
> > > > >       u32 act;
> > > > >
> > > > > +     ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
> > > > >       act = bpf_prog_run_xdp(xdp_prog, xdp);
> > > > >
> > > > >       if (likely(act == XDP_REDIRECT)) {
> > > > > @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> > > > >               if (ice_is_non_eop(rx_ring, rx_desc))
> > > > >                       continue;
> > > > >
> > > > > -             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> > > > > +             xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> > > > > +                                      rx_desc);
> > > > >               if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
> > > > >                       xdp_xmit |= xdp_res;
> > > > >               } else if (xdp_res == ICE_XDP_EXIT) {
> > > > > --
> > > > > 2.41.0
> > > > >
Fijalkowski, Maciej Oct. 20, 2023, 3:29 p.m. UTC | #6
On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> In AF_XDP ZC, xdp_buff is not stored on ring,
> instead it is provided by xsk_buff_pool.
> Space for metadata sources right after such buffers was already reserved
> in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> This makes the implementation rather straightforward.
> 
> Update AF_XDP ZC packet processing to support XDP hints.
> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> index ef778b8e6d1b..6ca620b2fbdd 100644
> --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
>  	return ICE_XDP_CONSUMED;
>  }
>  
> +/**
> + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> + * @xdp: xdp_buff used as input to the XDP program
> + * @eop_desc: End of packet descriptor
> + * @rx_ring: Rx ring with packet context
> + *
> + * In regular XDP, xdp_buff is placed inside the ring structure,
> + * just before the packet context, so the latter can be accessed
> + * with xdp_buff address only at all times, but in ZC mode,

s/only// ?

> + * xdp_buffs come from the pool, so we need to reinitialize
> + * context for every packet.
> + *
> + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> + * right after xdp_buff, for our private use.
> + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> + */
> +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> +				   union ice_32b_rx_flex_desc *eop_desc,
> +				   struct ice_rx_ring *rx_ring)
> +{
> +	XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> +	((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
> +	ice_xdp_meta_set_desc(xdp, eop_desc);
> +}
> +
>  /**
>   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
>   * @rx_ring: Rx ring
>   * @xdp: xdp_buff used as input to the XDP program
>   * @xdp_prog: XDP program to run
>   * @xdp_ring: ring to be used for XDP_TX action
> + * @rx_desc: packet descriptor
>   *
>   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
>   */
>  static int
>  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> -	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> +	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> +	       union ice_32b_rx_flex_desc *rx_desc)
>  {
>  	int err, result = ICE_XDP_PASS;
>  	u32 act;
>  
> +	ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
>  	act = bpf_prog_run_xdp(xdp_prog, xdp);
>  
>  	if (likely(act == XDP_REDIRECT)) {
> @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
>  		if (ice_is_non_eop(rx_ring, rx_desc))
>  			continue;
>  
> -		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> +		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> +					 rx_desc);
>  		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
>  			xdp_xmit |= xdp_res;
>  		} else if (xdp_res == ICE_XDP_EXIT) {
> -- 
> 2.41.0
>
Larysa Zaremba Oct. 23, 2023, 9:37 a.m. UTC | #7
On Fri, Oct 20, 2023 at 05:29:38PM +0200, Maciej Fijalkowski wrote:
> On Thu, Oct 12, 2023 at 07:05:13PM +0200, Larysa Zaremba wrote:
> > In AF_XDP ZC, xdp_buff is not stored on ring,
> > instead it is provided by xsk_buff_pool.
> > Space for metadata sources right after such buffers was already reserved
> > in commit 94ecc5ca4dbf ("xsk: Add cb area to struct xdp_buff_xsk").
> > This makes the implementation rather straightforward.
> > 
> > Update AF_XDP ZC packet processing to support XDP hints.
> > 
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_xsk.c | 34 ++++++++++++++++++++++--
> >  1 file changed, 32 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > index ef778b8e6d1b..6ca620b2fbdd 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > @@ -752,22 +752,51 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
> >  	return ICE_XDP_CONSUMED;
> >  }
> >  
> > +/**
> > + * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
> > + * @xdp: xdp_buff used as input to the XDP program
> > + * @eop_desc: End of packet descriptor
> > + * @rx_ring: Rx ring with packet context
> > + *
> > + * In regular XDP, xdp_buff is placed inside the ring structure,
> > + * just before the packet context, so the latter can be accessed
> > + * with xdp_buff address only at all times, but in ZC mode,
> 
> s/only// ?
>

Yes :D
 
> > + * xdp_buffs come from the pool, so we need to reinitialize
> > + * context for every packet.
> > + *
> > + * We can safely convert xdp_buff_xsk to ice_xdp_buff,
> > + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
> > + * right after xdp_buff, for our private use.
> > + * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
> > + */
> > +static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
> > +				   union ice_32b_rx_flex_desc *eop_desc,
> > +				   struct ice_rx_ring *rx_ring)
> > +{
> > +	XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
> > +	((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
> > +	ice_xdp_meta_set_desc(xdp, eop_desc);
> > +}
> > +
> >  /**
> >   * ice_run_xdp_zc - Executes an XDP program in zero-copy path
> >   * @rx_ring: Rx ring
> >   * @xdp: xdp_buff used as input to the XDP program
> >   * @xdp_prog: XDP program to run
> >   * @xdp_ring: ring to be used for XDP_TX action
> > + * @rx_desc: packet descriptor
> >   *
> >   * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> >   */
> >  static int
> >  ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> > -	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
> > +	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
> > +	       union ice_32b_rx_flex_desc *rx_desc)
> >  {
> >  	int err, result = ICE_XDP_PASS;
> >  	u32 act;
> >  
> > +	ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
> >  	act = bpf_prog_run_xdp(xdp_prog, xdp);
> >  
> >  	if (likely(act == XDP_REDIRECT)) {
> > @@ -907,7 +936,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> >  		if (ice_is_non_eop(rx_ring, rx_desc))
> >  			continue;
> >  
> > -		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
> > +		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
> > +					 rx_desc);
> >  		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
> >  			xdp_xmit |= xdp_res;
> >  		} else if (xdp_res == ICE_XDP_EXIT) {
> > -- 
> > 2.41.0
> >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index ef778b8e6d1b..6ca620b2fbdd 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -752,22 +752,51 @@  static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
 	return ICE_XDP_CONSUMED;
 }
 
+/**
+ * ice_prepare_pkt_ctx_zc - Prepare packet context for XDP hints
+ * @xdp: xdp_buff used as input to the XDP program
+ * @eop_desc: End of packet descriptor
+ * @rx_ring: Rx ring with packet context
+ *
+ * In regular XDP, xdp_buff is placed inside the ring structure,
+ * just before the packet context, so the latter can be accessed
+ * with xdp_buff address only at all times, but in ZC mode,
+ * xdp_buffs come from the pool, so we need to reinitialize
+ * context for every packet.
+ *
+ * We can safely convert xdp_buff_xsk to ice_xdp_buff,
+ * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk
+ * right after xdp_buff, for our private use.
+ * XSK_CHECK_PRIV_TYPE() ensures we do not go above the limit.
+ */
+static void ice_prepare_pkt_ctx_zc(struct xdp_buff *xdp,
+				   union ice_32b_rx_flex_desc *eop_desc,
+				   struct ice_rx_ring *rx_ring)
+{
+	XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
+	((struct ice_xdp_buff *)xdp)->pkt_ctx = rx_ring->pkt_ctx;
+	ice_xdp_meta_set_desc(xdp, eop_desc);
+}
+
 /**
  * ice_run_xdp_zc - Executes an XDP program in zero-copy path
  * @rx_ring: Rx ring
  * @xdp: xdp_buff used as input to the XDP program
  * @xdp_prog: XDP program to run
  * @xdp_ring: ring to be used for XDP_TX action
+ * @rx_desc: packet descriptor
  *
  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
  */
 static int
 ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
-	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+	       union ice_32b_rx_flex_desc *rx_desc)
 {
 	int err, result = ICE_XDP_PASS;
 	u32 act;
 
+	ice_prepare_pkt_ctx_zc(xdp, rx_desc, rx_ring);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
 	if (likely(act == XDP_REDIRECT)) {
@@ -907,7 +936,8 @@  int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 		if (ice_is_non_eop(rx_ring, rx_desc))
 			continue;
 
-		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
+		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
+					 rx_desc);
 		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
 			xdp_xmit |= xdp_res;
 		} else if (xdp_res == ICE_XDP_EXIT) {