diff mbox series

[v4,bpf,05/11] i40e: handle multi-buffer packets that are shrunk by xdp prog

Message ID 20240119233037.537084-6-maciej.fijalkowski@intel.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series net: bpf_xdp_adjust_tail() and Intel mbuf fixes | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1077 this patch: 1077
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1095 this patch: 1095
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1095 this patch: 1095
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 124 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Maciej Fijalkowski Jan. 19, 2024, 11:30 p.m. UTC
From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>

XDP programs can shrink packets by calling the bpf_xdp_adjust_tail()
helper function. For multi-buffer packets this may lead to reduction of
frag count stored in skb_shared_info area of the xdp_buff struct. This
results in issues with the current handling of XDP_PASS and XDP_DROP
cases.

For XDP_PASS, currently skb is being built using frag count of
xdp_buffer before it was processed by XDP prog and thus will result in
an inconsistent skb when frag count gets reduced by XDP prog. To fix
this, get correct frag count while building the skb instead of using
pre-obtained frag count.

For XDP_DROP, current page recycling logic will not reuse the page but
instead will adjust the pagecnt_bias so that the page can be freed. This
again results in inconsistent behavior as the page count has already
been changed by the helper while freeing the frag(s) as part of
shrinking the packet. To fix this, only adjust pagecnt_bias for buffers
that are stillpart of the packet post-xdp prog run.

Fixes: e213ced19bef ("i40e: add support for XDP multi-buffer Rx")
Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 42 ++++++++++++---------
 1 file changed, 24 insertions(+), 18 deletions(-)

Comments

Simon Horman Jan. 20, 2024, 11:35 a.m. UTC | #1
On Sat, Jan 20, 2024 at 12:30:31AM +0100, Maciej Fijalkowski wrote:
> From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> 
> XDP programs can shrink packets by calling the bpf_xdp_adjust_tail()
> helper function. For multi-buffer packets this may lead to reduction of
> frag count stored in skb_shared_info area of the xdp_buff struct. This
> results in issues with the current handling of XDP_PASS and XDP_DROP
> cases.
> 
> For XDP_PASS, currently skb is being built using frag count of
> xdp_buffer before it was processed by XDP prog and thus will result in
> an inconsistent skb when frag count gets reduced by XDP prog. To fix
> this, get correct frag count while building the skb instead of using
> pre-obtained frag count.
> 
> For XDP_DROP, current page recycling logic will not reuse the page but
> instead will adjust the pagecnt_bias so that the page can be freed. This
> again results in inconsistent behavior as the page count has already
> been changed by the helper while freeing the frag(s) as part of
> shrinking the packet. To fix this, only adjust pagecnt_bias for buffers
> that are stillpart of the packet post-xdp prog run.
> 
> Fixes: e213ced19bef ("i40e: add support for XDP multi-buffer Rx")
> Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Tested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>

...

> @@ -2129,20 +2130,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
>   * i40e_construct_skb - Allocate skb and populate it
>   * @rx_ring: rx descriptor ring to transact packets on
>   * @xdp: xdp_buff pointing to the data
> - * @nr_frags: number of buffers for the packet
>   *
>   * This function allocates an skb.  It then populates it with the page
>   * data from the current receive descriptor, taking care to set up the
>   * skb correctly.
>   */
>  static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
> -					  struct xdp_buff *xdp,
> -					  u32 nr_frags)
> +					  struct xdp_buff *xdp)
>  {
>  	unsigned int size = xdp->data_end - xdp->data;
>  	struct i40e_rx_buffer *rx_buffer;
> +	struct skb_shared_info *sinfo;
>  	unsigned int headlen;
>  	struct sk_buff *skb;
> +	u32 nr_frags;
>  
>  	/* prefetch first cache line of first page */
>  	net_prefetch(xdp->data);
> @@ -2180,6 +2181,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
>  	memcpy(__skb_put(skb, headlen), xdp->data,
>  	       ALIGN(headlen, sizeof(long)));
>  
> +	if (unlikely(xdp_buff_has_frags(xdp))) {
> +		sinfo = xdp_get_shared_info_from_buff(xdp);
> +		nr_frags = sinfo->nr_frags;
> +	}
>  	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
>  	/* update all of the pointers */
>  	size -= headlen;

Hi Maciej,

Above, nr_frags is initialised only if xdp_buff_has_frags(xdp) is true.
The code immediately following this hunk is:

	if (size) {
		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
			...

Can it be the case that nr_frags is used uninitialised here?

Flagged by Smatch.

...
Maciej Fijalkowski Jan. 22, 2024, 2:08 p.m. UTC | #2
On Sat, Jan 20, 2024 at 11:35:41AM +0000, Simon Horman wrote:
> On Sat, Jan 20, 2024 at 12:30:31AM +0100, Maciej Fijalkowski wrote:
> > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > 
> > XDP programs can shrink packets by calling the bpf_xdp_adjust_tail()
> > helper function. For multi-buffer packets this may lead to reduction of
> > frag count stored in skb_shared_info area of the xdp_buff struct. This
> > results in issues with the current handling of XDP_PASS and XDP_DROP
> > cases.
> > 
> > For XDP_PASS, currently skb is being built using frag count of
> > xdp_buffer before it was processed by XDP prog and thus will result in
> > an inconsistent skb when frag count gets reduced by XDP prog. To fix
> > this, get correct frag count while building the skb instead of using
> > pre-obtained frag count.
> > 
> > For XDP_DROP, current page recycling logic will not reuse the page but
> > instead will adjust the pagecnt_bias so that the page can be freed. This
> > again results in inconsistent behavior as the page count has already
> > been changed by the helper while freeing the frag(s) as part of
> > shrinking the packet. To fix this, only adjust pagecnt_bias for buffers
> > that are stillpart of the packet post-xdp prog run.
> > 
> > Fixes: e213ced19bef ("i40e: add support for XDP multi-buffer Rx")
> > Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > Tested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> 
> ...
> 
> > @@ -2129,20 +2130,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
> >   * i40e_construct_skb - Allocate skb and populate it
> >   * @rx_ring: rx descriptor ring to transact packets on
> >   * @xdp: xdp_buff pointing to the data
> > - * @nr_frags: number of buffers for the packet
> >   *
> >   * This function allocates an skb.  It then populates it with the page
> >   * data from the current receive descriptor, taking care to set up the
> >   * skb correctly.
> >   */
> >  static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
> > -					  struct xdp_buff *xdp,
> > -					  u32 nr_frags)
> > +					  struct xdp_buff *xdp)
> >  {
> >  	unsigned int size = xdp->data_end - xdp->data;
> >  	struct i40e_rx_buffer *rx_buffer;
> > +	struct skb_shared_info *sinfo;
> >  	unsigned int headlen;
> >  	struct sk_buff *skb;
> > +	u32 nr_frags;
> >  
> >  	/* prefetch first cache line of first page */
> >  	net_prefetch(xdp->data);
> > @@ -2180,6 +2181,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
> >  	memcpy(__skb_put(skb, headlen), xdp->data,
> >  	       ALIGN(headlen, sizeof(long)));
> >  
> > +	if (unlikely(xdp_buff_has_frags(xdp))) {
> > +		sinfo = xdp_get_shared_info_from_buff(xdp);
> > +		nr_frags = sinfo->nr_frags;
> > +	}
> >  	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
> >  	/* update all of the pointers */
> >  	size -= headlen;
> 
> Hi Maciej,
> 
> Above, nr_frags is initialised only if xdp_buff_has_frags(xdp) is true.
> The code immediately following this hunk is:
> 
> 	if (size) {
> 		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
> 			...
> 
> Can it be the case that nr_frags is used uninitialised here?
> 
> Flagged by Smatch.

Argh. Picked the old version of patch from Tirtha. Will resend with a
correct one.

> 
> ...
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b82df5bdfac0..b098ca2c11af 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2099,7 +2099,8 @@  static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 				  struct xdp_buff *xdp)
 {
-	u32 next = rx_ring->next_to_clean;
+	u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 next = rx_ring->next_to_clean, i = 0;
 	struct i40e_rx_buffer *rx_buffer;
 
 	xdp->flags = 0;
@@ -2112,10 +2113,10 @@  static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 		if (!rx_buffer->page)
 			continue;
 
-		if (xdp_res == I40E_XDP_CONSUMED)
-			rx_buffer->pagecnt_bias++;
-		else
+		if (xdp_res != I40E_XDP_CONSUMED)
 			i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+		else if (i++ <= nr_frags)
+			rx_buffer->pagecnt_bias++;
 
 		/* EOP buffer will be put in i40e_clean_rx_irq() */
 		if (next == rx_ring->next_to_process)
@@ -2129,20 +2130,20 @@  static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
  * i40e_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
  * @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
  *
  * This function allocates an skb.  It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
  * skb correctly.
  */
 static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
-					  struct xdp_buff *xdp,
-					  u32 nr_frags)
+					  struct xdp_buff *xdp)
 {
 	unsigned int size = xdp->data_end - xdp->data;
 	struct i40e_rx_buffer *rx_buffer;
+	struct skb_shared_info *sinfo;
 	unsigned int headlen;
 	struct sk_buff *skb;
+	u32 nr_frags;
 
 	/* prefetch first cache line of first page */
 	net_prefetch(xdp->data);
@@ -2180,6 +2181,10 @@  static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	memcpy(__skb_put(skb, headlen), xdp->data,
 	       ALIGN(headlen, sizeof(long)));
 
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
 	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 	/* update all of the pointers */
 	size -= headlen;
@@ -2199,9 +2204,8 @@  static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	}
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
-		struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
 
-		sinfo = xdp_get_shared_info_from_buff(xdp);
 		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
 		       sizeof(skb_frag_t) * nr_frags);
 
@@ -2224,17 +2228,17 @@  static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
  * i40e_build_skb - Build skb around an existing buffer
  * @rx_ring: Rx descriptor ring to transact packets on
  * @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
  *
  * This function builds an skb around an existing Rx buffer, taking care
  * to set up the skb correctly and avoid any memcpy overhead.
  */
 static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
-				      struct xdp_buff *xdp,
-				      u32 nr_frags)
+				      struct xdp_buff *xdp)
 {
 	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo;
 	struct sk_buff *skb;
+	u32 nr_frags;
 
 	/* Prefetch first cache line of first page. If xdp->data_meta
 	 * is unused, this points exactly as xdp->data, otherwise we
@@ -2243,6 +2247,11 @@  static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 	 */
 	net_prefetch(xdp->data_meta);
 
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
 	/* build an skb around the page buffer */
 	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
 	if (unlikely(!skb))
@@ -2255,9 +2264,6 @@  static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 		skb_metadata_set(skb, metasize);
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
-		struct skb_shared_info *sinfo;
-
-		sinfo = xdp_get_shared_info_from_buff(xdp);
 		xdp_update_skb_shared_info(skb, nr_frags,
 					   sinfo->xdp_frags_size,
 					   nr_frags * xdp->frame_sz,
@@ -2546,7 +2552,7 @@  static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
 			/* Update ntc and bump cleaned count if not in the
 			 * middle of mb packet.
 			 */
-			if (rx_ring->next_to_clean == ntp) {
+			if (!xdp->data) {
 				rx_ring->next_to_clean =
 					rx_ring->next_to_process;
 				cleaned_count++;
@@ -2602,9 +2608,9 @@  static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
 			total_rx_bytes += size;
 		} else {
 			if (ring_uses_build_skb(rx_ring))
-				skb = i40e_build_skb(rx_ring, xdp, nfrags);
+				skb = i40e_build_skb(rx_ring, xdp);
 			else
-				skb = i40e_construct_skb(rx_ring, xdp, nfrags);
+				skb = i40e_construct_skb(rx_ring, xdp);
 
 			/* drop if we failed to retrieve a buffer */
 			if (!skb) {