diff mbox series

net: linearizing skb when downgrade gso_size

Message ID 20240708143128.49949-1-dracodingfly@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series net: linearizing skb when downgrade gso_size | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 869 this patch: 869
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: almasrymina@google.com eddyz87@gmail.com
netdev/build_clang success Errors and warnings before: 933 this patch: 933
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5902 this patch: 5902
netdev/checkpatch fail ERROR: code indent should use tabs where possible WARNING: Missing a blank line after declarations WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 119 this patch: 119
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release

Commit Message

Fred Li July 8, 2024, 2:31 p.m. UTC
Here is a patch that linearizing skb when downgrade
gso_size and sg should disabled, If there are no issues,
I will submit a formal patch shortly.

Signed-off-by: Fred Li <dracodingfly@gmail.com>
---
 include/linux/skbuff.h | 22 ++++++++++++++++++++++
 net/core/filter.c      | 16 ++++++++++++----
 net/core/skbuff.c      | 19 ++-----------------
 3 files changed, 36 insertions(+), 21 deletions(-)

Comments

Willem de Bruijn July 9, 2024, 3:53 p.m. UTC | #1
Fred Li wrote:
> Here is a patch that linearizing skb when downgrade
> gso_size and sg should disabled, If there are no issues,
> I will submit a formal patch shortly.

Target bpf.

Probably does not need quite as many direct CCs. 
 
> Signed-off-by: Fred Li <dracodingfly@gmail.com>
> ---
>  include/linux/skbuff.h | 22 ++++++++++++++++++++++
>  net/core/filter.c      | 16 ++++++++++++----
>  net/core/skbuff.c      | 19 ++-----------------
>  3 files changed, 36 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 5f11f9873341..99b7fc1e826a 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -2400,6 +2400,28 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb)
>  	return skb->len - skb->data_len;
>  }
>  
> +static inline bool skb_is_nonsg(const struct sk_buff *skb)
> +{

is_nonsg does not cover the functionality, which is fairly subtle.
But maybe we don't need this function at all, see below..

> +	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> +	struct sk_buff *check_skb;

No need for separate check_skb

> +	for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
> +		if (skb_headlen(check_skb) && !check_skb->head_frag) {
> +			/* gso_size is untrusted, and we have a frag_list with
> +                         * a linear non head_frag item.
> +                         *
> +                         * If head_skb's headlen does not fit requested gso_size,
> +                         * it means that the frag_list members do NOT terminate
> +                         * on exact gso_size boundaries. Hence we cannot perform
> +                         * skb_frag_t page sharing. Therefore we must fallback to
> +                         * copying the frag_list skbs; we do so by disabling SG.
> +                         */
> +			return true;
> +		}
> +	}
> +
> +	return false;
> +}
> +
>  static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
>  {
>  	unsigned int i, len = 0;
> diff --git a/net/core/filter.c b/net/core/filter.c
> index df4578219e82..c0e6e7f28635 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3525,13 +3525,21 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>  	if (skb_is_gso(skb)) {
>  		struct skb_shared_info *shinfo = skb_shinfo(skb);
>  
> -		/* Due to header grow, MSS needs to be downgraded. */
> -		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> -			skb_decrease_gso_size(shinfo, len_diff);
> -
>  		/* Header must be checked, and gso_segs recomputed. */
>  		shinfo->gso_type |= gso_type;
>  		shinfo->gso_segs = 0;
> +
> +		/* Due to header grow, MSS needs to be downgraded.
> +		 * There is BUG_ON When segment the frag_list with
> +		 * head_frag true so linearize skb after downgrade
> +		 * the MSS.
> +		 */
> +		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) {
> +			skb_decrease_gso_size(shinfo, len_diff);
> +			if (skb_is_nonsg(skb))
> +				return skb_linearize(skb) ? : 0;
> +		}
> +

No need for ternary statement.

Instead of the complex test in skb_is_nonsg, can we just assume that
alignment will be off if having frag_list and changing gso_size.

The same will apply to bpf_skb_net_shrink too.

Not sure that it is okay to linearize inside a BPF helper function.
Hopefully bpf experts can chime in on that.

>  	}
>  
>  	return 0;
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index b1dab1b071fc..81e018185527 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4458,23 +4458,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>  
>  	if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
>  	    mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
> -		struct sk_buff *check_skb;
> -
> -		for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
> -			if (skb_headlen(check_skb) && !check_skb->head_frag) {
> -				/* gso_size is untrusted, and we have a frag_list with
> -				 * a linear non head_frag item.
> -				 *
> -				 * If head_skb's headlen does not fit requested gso_size,
> -				 * it means that the frag_list members do NOT terminate
> -				 * on exact gso_size boundaries. Hence we cannot perform
> -				 * skb_frag_t page sharing. Therefore we must fallback to
> -				 * copying the frag_list skbs; we do so by disabling SG.
> -				 */
> -				features &= ~NETIF_F_SG;
> -				break;
> -			}
> -		}
> +		if (skb_is_nonsg(head_skb))
> +			features &= ~NETIF_F_SG;
>  	}
>  
>  	__skb_push(head_skb, doffset);
> -- 
> 2.33.0
>
Herbert Xu July 9, 2024, 8:16 p.m. UTC | #2
On Tue, Jul 09, 2024 at 11:53:21AM -0400, Willem de Bruijn wrote:
>
> > +		/* Due to header grow, MSS needs to be downgraded.
> > +		 * There is BUG_ON When segment the frag_list with
> > +		 * head_frag true so linearize skb after downgrade
> > +		 * the MSS.
> > +		 */

This sounds completely wrong.  You should never grow the TCP header
by changing gso_size.  What is the usage-scenario for this?

Think about it, if a router forwards a TCP packet, and ends up
growing its TCP header and then splits the packet into two, then
this router is brain-dead.

Cheers,
Willem de Bruijn July 9, 2024, 9:29 p.m. UTC | #3
Herbert Xu wrote:
> On Tue, Jul 09, 2024 at 11:53:21AM -0400, Willem de Bruijn wrote:
> >
> > > +		/* Due to header grow, MSS needs to be downgraded.
> > > +		 * There is BUG_ON When segment the frag_list with
> > > +		 * head_frag true so linearize skb after downgrade
> > > +		 * the MSS.
> > > +		 */
> 
> This sounds completely wrong.  You should never grow the TCP header
> by changing gso_size.  What is the usage-scenario for this?
> 
> Think about it, if a router forwards a TCP packet, and ends up
> growing its TCP header and then splits the packet into two, then
> this router is brain-dead.

This is an unfortunate feature, but already exists.

It decreases gso_size to account for tunnel headers.

For USO, we added BPF_F_ADJ_ROOM_FIXED_GSO to avoid this in better,
newer users.
Herbert Xu July 10, 2024, 11:06 p.m. UTC | #4
On Tue, Jul 09, 2024 at 05:29:59PM -0400, Willem de Bruijn wrote:
>
> This is an unfortunate feature, but already exists.
> 
> It decreases gso_size to account for tunnel headers.

Growing the tunnel header is totally fine.  But you should not
decrease gso_size because of that.  Instead the correct course
of action is to drop the packet and generate an ICMP if it no
longer fits the MTU.

A router that resegments a TCP packet at the TCP-level (not IP)
is brain-dead.

Cheers,
Fred Li July 12, 2024, 8:17 a.m. UTC | #5
> No need for ternary statement.
> 
> Instead of the complex test in skb_is_nonsg, can we just assume that
> alignment will be off if having frag_list and changing gso_size.
> 
> The same will apply to bpf_skb_net_shrink too.

increase gso_size may be no problem and we can use BPF_F_ADJ_ROOM_FIXED_GSO
to avoid update gso_size when shrink.

> 
> Not sure that it is okay to linearize inside a BPF helper function.
> Hopefully bpf experts can chime in on that.

Thanks

Fred Li
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5f11f9873341..99b7fc1e826a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2400,6 +2400,28 @@  static inline unsigned int skb_headlen(const struct sk_buff *skb)
 	return skb->len - skb->data_len;
 }
 
+static inline bool skb_is_nonsg(const struct sk_buff *skb)
+{
+	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+	struct sk_buff *check_skb;
+	for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
+		if (skb_headlen(check_skb) && !check_skb->head_frag) {
+			/* gso_size is untrusted, and we have a frag_list with
+                         * a linear non head_frag item.
+                         *
+                         * If head_skb's headlen does not fit requested gso_size,
+                         * it means that the frag_list members do NOT terminate
+                         * on exact gso_size boundaries. Hence we cannot perform
+                         * skb_frag_t page sharing. Therefore we must fallback to
+                         * copying the frag_list skbs; we do so by disabling SG.
+                         */
+			return true;
+		}
+	}
+
+	return false;
+}
+
 static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
 {
 	unsigned int i, len = 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index df4578219e82..c0e6e7f28635 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3525,13 +3525,21 @@  static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-		/* Due to header grow, MSS needs to be downgraded. */
-		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
-			skb_decrease_gso_size(shinfo, len_diff);
-
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= gso_type;
 		shinfo->gso_segs = 0;
+
+		/* Due to header grow, MSS needs to be downgraded.
+		 * There is BUG_ON When segment the frag_list with
+		 * head_frag true so linearize skb after downgrade
+		 * the MSS.
+		 */
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) {
+			skb_decrease_gso_size(shinfo, len_diff);
+			if (skb_is_nonsg(skb))
+				return skb_linearize(skb) ? : 0;
+		}
+
 	}
 
 	return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1dab1b071fc..81e018185527 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4458,23 +4458,8 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 	if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
 	    mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
-		struct sk_buff *check_skb;
-
-		for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
-			if (skb_headlen(check_skb) && !check_skb->head_frag) {
-				/* gso_size is untrusted, and we have a frag_list with
-				 * a linear non head_frag item.
-				 *
-				 * If head_skb's headlen does not fit requested gso_size,
-				 * it means that the frag_list members do NOT terminate
-				 * on exact gso_size boundaries. Hence we cannot perform
-				 * skb_frag_t page sharing. Therefore we must fallback to
-				 * copying the frag_list skbs; we do so by disabling SG.
-				 */
-				features &= ~NETIF_F_SG;
-				break;
-			}
-		}
+		if (skb_is_nonsg(head_skb))
+			features &= ~NETIF_F_SG;
 	}
 
 	__skb_push(head_skb, doffset);