diff mbox series

[v5,bpf,04/11] ice: work on pre-XDP prog frag count

Message ID 20240122221610.556746-5-maciej.fijalkowski@intel.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series net: bpf_xdp_adjust_tail() and Intel mbuf fixes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1077 this patch: 1077
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1095 this patch: 1095
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1095 this patch: 1095
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 103 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Maciej Fijalkowski Jan. 22, 2024, 10:16 p.m. UTC
Fix an OOM panic in XDP_DRV mode when a XDP program shrinks a
multi-buffer packet by 4k bytes and then redirects it to an AF_XDP
socket.

Since support for handling multi-buffer frames was added to XDP, usage
of bpf_xdp_adjust_tail() helper within XDP program can free the page
that given fragment occupies and in turn decrease the fragment count
within skb_shared_info that is embedded in xdp_buff struct. In current
ice driver codebase, it can become problematic when page recycling logic
decides not to reuse the page. In such case, __page_frag_cache_drain()
is used with ice_rx_buf::pagecnt_bias that was not adjusted after
refcount of page was changed by XDP prog which in turn does not drain
the refcount to 0 and page is never freed.

To address this, let us store the count of frags before the XDP program
was executed on Rx ring struct. This will be used to compare with
current frag count from skb_shared_info embedded in xdp_buff. A smaller
value in the latter indicates that XDP prog freed frag(s). Then, for
given delta decrement pagecnt_bias for XDP_DROP verdict.

While at it, let us also handle the EOP frag within
ice_set_rx_bufs_act() to make our life easier, so all of the adjustments
needed to be applied against freed frags are performed in the single
place.

Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c     | 14 ++++++---
 drivers/net/ethernet/intel/ice/ice_txrx.h     |  1 +
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 31 +++++++++++++------
 3 files changed, 32 insertions(+), 14 deletions(-)

Comments

Magnus Karlsson Jan. 24, 2024, 8:37 a.m. UTC | #1
On Mon, 22 Jan 2024 at 23:16, Maciej Fijalkowski
<maciej.fijalkowski@intel.com> wrote:
>
> Fix an OOM panic in XDP_DRV mode when a XDP program shrinks a
> multi-buffer packet by 4k bytes and then redirects it to an AF_XDP
> socket.
>
> Since support for handling multi-buffer frames was added to XDP, usage
> of bpf_xdp_adjust_tail() helper within XDP program can free the page
> that given fragment occupies and in turn decrease the fragment count
> within skb_shared_info that is embedded in xdp_buff struct. In current
> ice driver codebase, it can become problematic when page recycling logic
> decides not to reuse the page. In such case, __page_frag_cache_drain()
> is used with ice_rx_buf::pagecnt_bias that was not adjusted after
> refcount of page was changed by XDP prog which in turn does not drain
> the refcount to 0 and page is never freed.
>
> To address this, let us store the count of frags before the XDP program
> was executed on Rx ring struct. This will be used to compare with
> current frag count from skb_shared_info embedded in xdp_buff. A smaller
> value in the latter indicates that XDP prog freed frag(s). Then, for
> given delta decrement pagecnt_bias for XDP_DROP verdict.
>
> While at it, let us also handle the EOP frag within
> ice_set_rx_bufs_act() to make our life easier, so all of the adjustments
> needed to be applied against freed frags are performed in the single
> place.
>
> Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side")
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_txrx.c     | 14 ++++++---
>  drivers/net/ethernet/intel/ice/ice_txrx.h     |  1 +
>  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 31 +++++++++++++------
>  3 files changed, 32 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> index 59617f055e35..1760e81379cc 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> @@ -603,9 +603,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
>                 ret = ICE_XDP_CONSUMED;
>         }
>  exit:
> -       rx_buf->act = ret;
> -       if (unlikely(xdp_buff_has_frags(xdp)))
> -               ice_set_rx_bufs_act(xdp, rx_ring, ret);
> +       ice_set_rx_bufs_act(xdp, rx_ring, ret);
>  }
>
>  /**
> @@ -893,14 +891,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
>         }
>
>         if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
> -               if (unlikely(xdp_buff_has_frags(xdp)))
> -                       ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
> +               ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
>                 return -ENOMEM;
>         }
>
>         __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
>                                    rx_buf->page_offset, size);
>         sinfo->xdp_frags_size += size;
> +       /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
> +        * can pop off frags but driver has to handle it on its own
> +        */
> +       rx_ring->nr_frags = sinfo->nr_frags;
>
>         if (page_is_pfmemalloc(rx_buf->page))
>                 xdp_buff_set_frag_pfmemalloc(xdp);
> @@ -1251,6 +1252,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
>
>                 xdp->data = NULL;
>                 rx_ring->first_desc = ntc;
> +               rx_ring->nr_frags = 0;
>                 continue;
>  construct_skb:
>                 if (likely(ice_ring_uses_build_skb(rx_ring)))
> @@ -1266,10 +1268,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
>                                                     ICE_XDP_CONSUMED);
>                         xdp->data = NULL;
>                         rx_ring->first_desc = ntc;
> +                       rx_ring->nr_frags = 0;
>                         break;
>                 }
>                 xdp->data = NULL;
>                 rx_ring->first_desc = ntc;
> +               rx_ring->nr_frags = 0;

Are these needed? Or asked in another way, is there some way in which
ice_set_rx_bufs_act() can be executed before ice_add_xdp_frag()? If
not, we could remove them.

Looks good otherwise.

Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>

>
>                 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
>                 if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> index b3379ff73674..af955b0e5dc5 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -358,6 +358,7 @@ struct ice_rx_ring {
>         struct ice_tx_ring *xdp_ring;
>         struct ice_rx_ring *next;       /* pointer to next ring in q_vector */
>         struct xsk_buff_pool *xsk_pool;
> +       u32 nr_frags;
>         dma_addr_t dma;                 /* physical address of ring */
>         u16 rx_buf_len;
>         u8 dcb_tc;                      /* Traffic class of ring */
> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> index 762047508619..afcead4baef4 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> @@ -12,26 +12,39 @@
>   * act: action to store onto Rx buffers related to XDP buffer parts
>   *
>   * Set action that should be taken before putting Rx buffer from first frag
> - * to one before last. Last one is handled by caller of this function as it
> - * is the EOP frag that is currently being processed. This function is
> - * supposed to be called only when XDP buffer contains frags.
> + * to the last.
>   */
>  static inline void
>  ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
>                     const unsigned int act)
>  {
> -       const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
> -       u32 first = rx_ring->first_desc;
> -       u32 nr_frags = sinfo->nr_frags;
> +       u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
> +       u32 nr_frags = rx_ring->nr_frags + 1;
> +       u32 idx = rx_ring->first_desc;
>         u32 cnt = rx_ring->count;
>         struct ice_rx_buf *buf;
>
>         for (int i = 0; i < nr_frags; i++) {
> -               buf = &rx_ring->rx_buf[first];
> +               buf = &rx_ring->rx_buf[idx];
>                 buf->act = act;
>
> -               if (++first == cnt)
> -                       first = 0;
> +               if (++idx == cnt)
> +                       idx = 0;
> +       }
> +
> +       /* adjust pagecnt_bias on frags freed by XDP prog */
> +       if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
> +               u32 delta = rx_ring->nr_frags - sinfo_frags;
> +
> +               while (delta) {
> +                       if (idx == 0)
> +                               idx = cnt - 1;
> +                       else
> +                               idx--;
> +                       buf = &rx_ring->rx_buf[idx];
> +                       buf->pagecnt_bias--;
> +                       delta--;
> +               }
>         }
>  }
>
> --
> 2.34.1
>
>
Maciej Fijalkowski Jan. 24, 2024, 2:05 p.m. UTC | #2
On Wed, Jan 24, 2024 at 09:37:13AM +0100, Magnus Karlsson wrote:
> On Mon, 22 Jan 2024 at 23:16, Maciej Fijalkowski
> <maciej.fijalkowski@intel.com> wrote:
> >
> > Fix an OOM panic in XDP_DRV mode when a XDP program shrinks a
> > multi-buffer packet by 4k bytes and then redirects it to an AF_XDP
> > socket.
> >
> > Since support for handling multi-buffer frames was added to XDP, usage
> > of bpf_xdp_adjust_tail() helper within XDP program can free the page
> > that given fragment occupies and in turn decrease the fragment count
> > within skb_shared_info that is embedded in xdp_buff struct. In current
> > ice driver codebase, it can become problematic when page recycling logic
> > decides not to reuse the page. In such case, __page_frag_cache_drain()
> > is used with ice_rx_buf::pagecnt_bias that was not adjusted after
> > refcount of page was changed by XDP prog which in turn does not drain
> > the refcount to 0 and page is never freed.
> >
> > To address this, let us store the count of frags before the XDP program
> > was executed on Rx ring struct. This will be used to compare with
> > current frag count from skb_shared_info embedded in xdp_buff. A smaller
> > value in the latter indicates that XDP prog freed frag(s). Then, for
> > given delta decrement pagecnt_bias for XDP_DROP verdict.
> >
> > While at it, let us also handle the EOP frag within
> > ice_set_rx_bufs_act() to make our life easier, so all of the adjustments
> > needed to be applied against freed frags are performed in the single
> > place.
> >
> > Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side")
> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_txrx.c     | 14 ++++++---
> >  drivers/net/ethernet/intel/ice/ice_txrx.h     |  1 +
> >  drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 31 +++++++++++++------
> >  3 files changed, 32 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > index 59617f055e35..1760e81379cc 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > @@ -603,9 +603,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> >                 ret = ICE_XDP_CONSUMED;
> >         }
> >  exit:
> > -       rx_buf->act = ret;
> > -       if (unlikely(xdp_buff_has_frags(xdp)))
> > -               ice_set_rx_bufs_act(xdp, rx_ring, ret);
> > +       ice_set_rx_bufs_act(xdp, rx_ring, ret);
> >  }
> >
> >  /**
> > @@ -893,14 +891,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
> >         }
> >
> >         if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
> > -               if (unlikely(xdp_buff_has_frags(xdp)))
> > -                       ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
> > +               ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
> >                 return -ENOMEM;
> >         }
> >
> >         __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
> >                                    rx_buf->page_offset, size);
> >         sinfo->xdp_frags_size += size;
> > +       /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
> > +        * can pop off frags but driver has to handle it on its own
> > +        */
> > +       rx_ring->nr_frags = sinfo->nr_frags;
> >
> >         if (page_is_pfmemalloc(rx_buf->page))
> >                 xdp_buff_set_frag_pfmemalloc(xdp);
> > @@ -1251,6 +1252,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
> >
> >                 xdp->data = NULL;
> >                 rx_ring->first_desc = ntc;
> > +               rx_ring->nr_frags = 0;
> >                 continue;
> >  construct_skb:
> >                 if (likely(ice_ring_uses_build_skb(rx_ring)))
> > @@ -1266,10 +1268,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
> >                                                     ICE_XDP_CONSUMED);
> >                         xdp->data = NULL;
> >                         rx_ring->first_desc = ntc;
> > +                       rx_ring->nr_frags = 0;
> >                         break;
> >                 }
> >                 xdp->data = NULL;
> >                 rx_ring->first_desc = ntc;
> > +               rx_ring->nr_frags = 0;
> 
> Are these needed? Or asked in another way, is there some way in which
> ice_set_rx_bufs_act() can be executed before ice_add_xdp_frag()? If
> not, we could remove them.

I am afraid that if you would have fragged packet followed by non-fragged
one then ice_set_rx_bufs_act() would incorrectly go over more buffers
than it was supposed to. I think we should keep those, unless I am missing
something?

> 
> Looks good otherwise.
> 
> Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
> 
> >
> >                 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
> >                 if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > index b3379ff73674..af955b0e5dc5 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> > @@ -358,6 +358,7 @@ struct ice_rx_ring {
> >         struct ice_tx_ring *xdp_ring;
> >         struct ice_rx_ring *next;       /* pointer to next ring in q_vector */
> >         struct xsk_buff_pool *xsk_pool;
> > +       u32 nr_frags;
> >         dma_addr_t dma;                 /* physical address of ring */
> >         u16 rx_buf_len;
> >         u8 dcb_tc;                      /* Traffic class of ring */
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > index 762047508619..afcead4baef4 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
> > @@ -12,26 +12,39 @@
> >   * act: action to store onto Rx buffers related to XDP buffer parts
> >   *
> >   * Set action that should be taken before putting Rx buffer from first frag
> > - * to one before last. Last one is handled by caller of this function as it
> > - * is the EOP frag that is currently being processed. This function is
> > - * supposed to be called only when XDP buffer contains frags.
> > + * to the last.
> >   */
> >  static inline void
> >  ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
> >                     const unsigned int act)
> >  {
> > -       const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
> > -       u32 first = rx_ring->first_desc;
> > -       u32 nr_frags = sinfo->nr_frags;
> > +       u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
> > +       u32 nr_frags = rx_ring->nr_frags + 1;
> > +       u32 idx = rx_ring->first_desc;
> >         u32 cnt = rx_ring->count;
> >         struct ice_rx_buf *buf;
> >
> >         for (int i = 0; i < nr_frags; i++) {
> > -               buf = &rx_ring->rx_buf[first];
> > +               buf = &rx_ring->rx_buf[idx];
> >                 buf->act = act;
> >
> > -               if (++first == cnt)
> > -                       first = 0;
> > +               if (++idx == cnt)
> > +                       idx = 0;
> > +       }
> > +
> > +       /* adjust pagecnt_bias on frags freed by XDP prog */
> > +       if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
> > +               u32 delta = rx_ring->nr_frags - sinfo_frags;
> > +
> > +               while (delta) {
> > +                       if (idx == 0)
> > +                               idx = cnt - 1;
> > +                       else
> > +                               idx--;
> > +                       buf = &rx_ring->rx_buf[idx];
> > +                       buf->pagecnt_bias--;
> > +                       delta--;
> > +               }
> >         }
> >  }
> >
> > --
> > 2.34.1
> >
> >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 59617f055e35..1760e81379cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -603,9 +603,7 @@  ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 		ret = ICE_XDP_CONSUMED;
 	}
 exit:
-	rx_buf->act = ret;
-	if (unlikely(xdp_buff_has_frags(xdp)))
-		ice_set_rx_bufs_act(xdp, rx_ring, ret);
+	ice_set_rx_bufs_act(xdp, rx_ring, ret);
 }
 
 /**
@@ -893,14 +891,17 @@  ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	}
 
 	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
-		if (unlikely(xdp_buff_has_frags(xdp)))
-			ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+		ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
 		return -ENOMEM;
 	}
 
 	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
 				   rx_buf->page_offset, size);
 	sinfo->xdp_frags_size += size;
+	/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
+	 * can pop off frags but driver has to handle it on its own
+	 */
+	rx_ring->nr_frags = sinfo->nr_frags;
 
 	if (page_is_pfmemalloc(rx_buf->page))
 		xdp_buff_set_frag_pfmemalloc(xdp);
@@ -1251,6 +1252,7 @@  int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 
 		xdp->data = NULL;
 		rx_ring->first_desc = ntc;
+		rx_ring->nr_frags = 0;
 		continue;
 construct_skb:
 		if (likely(ice_ring_uses_build_skb(rx_ring)))
@@ -1266,10 +1268,12 @@  int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 						    ICE_XDP_CONSUMED);
 			xdp->data = NULL;
 			rx_ring->first_desc = ntc;
+			rx_ring->nr_frags = 0;
 			break;
 		}
 		xdp->data = NULL;
 		rx_ring->first_desc = ntc;
+		rx_ring->nr_frags = 0;
 
 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
 		if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b3379ff73674..af955b0e5dc5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -358,6 +358,7 @@  struct ice_rx_ring {
 	struct ice_tx_ring *xdp_ring;
 	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
 	struct xsk_buff_pool *xsk_pool;
+	u32 nr_frags;
 	dma_addr_t dma;			/* physical address of ring */
 	u16 rx_buf_len;
 	u8 dcb_tc;			/* Traffic class of ring */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 762047508619..afcead4baef4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -12,26 +12,39 @@ 
  * act: action to store onto Rx buffers related to XDP buffer parts
  *
  * Set action that should be taken before putting Rx buffer from first frag
- * to one before last. Last one is handled by caller of this function as it
- * is the EOP frag that is currently being processed. This function is
- * supposed to be called only when XDP buffer contains frags.
+ * to the last.
  */
 static inline void
 ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
 		    const unsigned int act)
 {
-	const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-	u32 first = rx_ring->first_desc;
-	u32 nr_frags = sinfo->nr_frags;
+	u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 nr_frags = rx_ring->nr_frags + 1;
+	u32 idx = rx_ring->first_desc;
 	u32 cnt = rx_ring->count;
 	struct ice_rx_buf *buf;
 
 	for (int i = 0; i < nr_frags; i++) {
-		buf = &rx_ring->rx_buf[first];
+		buf = &rx_ring->rx_buf[idx];
 		buf->act = act;
 
-		if (++first == cnt)
-			first = 0;
+		if (++idx == cnt)
+			idx = 0;
+	}
+
+	/* adjust pagecnt_bias on frags freed by XDP prog */
+	if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
+		u32 delta = rx_ring->nr_frags - sinfo_frags;
+
+		while (delta) {
+			if (idx == 0)
+				idx = cnt - 1;
+			else
+				idx--;
+			buf = &rx_ring->rx_buf[idx];
+			buf->pagecnt_bias--;
+			delta--;
+		}
 	}
 }