diff mbox series

[v2,net-next] gro: add support of (hw)gro packets to gro stack

Message ID 20220930220905.2019461-1-eric.dumazet@gmail.com (mailing list archive)
State Accepted
Commit 5eddb24901ee49eee23c0bfce6af2e83fd5679bd
Delegated to: Netdev Maintainers
Headers show
Series [v2,net-next] gro: add support of (hw)gro packets to gro stack | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/cc_maintainers warning 2 maintainers not CCed: dsahern@kernel.org yoshfuji@linux-ipv6.org
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6 this patch: 6
netdev/checkpatch warning WARNING: 'Co-developed-by:' is the preferred signature form WARNING: line length of 83 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Sept. 30, 2022, 10:09 p.m. UTC
From: Coco Li <lixiaoyan@google.com>

Current GRO stack only supports incoming packets containing
one frame/MSS.

This patch changes GRO to accept packets that are already GRO.

HW-GRO (aka RSC for some vendors) is very often limited in presence
of interleaved packets. Linux SW GRO stack can complete the job
and provide larger GRO packets, thus reducing rate of ACK packets
and cpu overhead.

This also means BIG TCP can still be used, even if HW-GRO/RSC was
able to cook ~64 KB GRO packets.

v2: fix logic in tcp_gro_receive()

    Only support TCP for the moment (Paolo)

Co-Developed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Coco Li <lixiaoyan@google.com>
---
 net/core/gro.c         | 18 ++++++++++++++----
 net/ipv4/tcp_offload.c | 17 +++++++++++++++--
 2 files changed, 29 insertions(+), 6 deletions(-)

Comments

Paolo Abeni Oct. 2, 2022, 2:13 p.m. UTC | #1
On Fri, 2022-09-30 at 15:09 -0700, Eric Dumazet wrote:
> From: Coco Li <lixiaoyan@google.com>
> 
> Current GRO stack only supports incoming packets containing
> one frame/MSS.
> 
> This patch changes GRO to accept packets that are already GRO.
> 
> HW-GRO (aka RSC for some vendors) is very often limited in presence
> of interleaved packets. Linux SW GRO stack can complete the job
> and provide larger GRO packets, thus reducing rate of ACK packets
> and cpu overhead.
> 
> This also means BIG TCP can still be used, even if HW-GRO/RSC was
> able to cook ~64 KB GRO packets.
> 
> v2: fix logic in tcp_gro_receive()
> 
>     Only support TCP for the moment (Paolo)
> 
> Co-Developed-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Coco Li <lixiaoyan@google.com>
> ---
>  net/core/gro.c         | 18 ++++++++++++++----
>  net/ipv4/tcp_offload.c | 17 +++++++++++++++--
>  2 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/net/core/gro.c b/net/core/gro.c
> index b4190eb084672fb4f2be8b437eccb4e8507ff63f..bc9451743307bc380cca96ae6995aa0a3b83d185 100644
> --- a/net/core/gro.c
> +++ b/net/core/gro.c
> @@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  	unsigned int gro_max_size;
>  	unsigned int new_truesize;
>  	struct sk_buff *lp;
> +	int segs;
>  
>  	/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
>  	gro_max_size = READ_ONCE(p->dev->gro_max_size);
> @@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  			return -E2BIG;
>  	}
>  
> +	segs = NAPI_GRO_CB(skb)->count;
>  	lp = NAPI_GRO_CB(p)->last;
>  	pinfo = skb_shinfo(lp);
>  
> @@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  	lp = p;
>  
>  done:
> -	NAPI_GRO_CB(p)->count++;
> +	NAPI_GRO_CB(p)->count += segs;
>  	p->data_len += len;
>  	p->truesize += delta_truesize;
>  	p->len += len;
> @@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  		BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
>  					 sizeof(u32))); /* Avoid slow unaligned acc */
>  		*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
> -		NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
> +		NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
>  		NAPI_GRO_CB(skb)->is_atomic = 1;
> +		NAPI_GRO_CB(skb)->count = 1;
> +		if (unlikely(skb_is_gso(skb))) {
> +			NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
> +			/* Only support TCP at the moment. */
> +			if (!skb_is_gso_tcp(skb))
> +				NAPI_GRO_CB(skb)->flush = 1;
> +		}
>  
>  		/* Setup for GRO checksum validation */
>  		switch (skb->ip_summed) {
> @@ -545,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	else
>  		gro_list->count++;
>  
> -	NAPI_GRO_CB(skb)->count = 1;
>  	NAPI_GRO_CB(skb)->age = jiffies;
>  	NAPI_GRO_CB(skb)->last = skb;
> -	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
> +	if (!skb_is_gso(skb))
> +		skb_shinfo(skb)->gso_size = skb_gro_len(skb);
>  	list_add(&skb->list, &gro_list->list);
>  	ret = GRO_HELD;
>  
> @@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
>  
>  	skb->encapsulation = 0;
>  	skb_shinfo(skb)->gso_type = 0;
> +	skb_shinfo(skb)->gso_size = 0;
>  	if (unlikely(skb->slow_gro)) {
>  		skb_orphan(skb);
>  		skb_ext_reset(skb);
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index a844a0d38482d916251f3aca4555c75c9770820c..45dda788938704c3f762256266d9ea29b6ded4a5 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  
>  	mss = skb_shinfo(p)->gso_size;
>  
> -	flush |= (len - 1) >= mss;
> +	/* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
> +	 * If it is a single frame, do not aggregate it if its length
> +	 * is bigger than our mss.
> +	 */
> +	if (unlikely(skb_is_gso(skb)))
> +		flush |= (mss != skb_shinfo(skb)->gso_size);
> +	else
> +		flush |= (len - 1) >= mss;
> +
>  	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>  #ifdef CONFIG_TLS_DEVICE
>  	flush |= p->decrypted ^ skb->decrypted;
> @@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
>  
>  out_check_final:
> -	flush = len < mss;
> +	/* Force a flush if last segment is smaller than mss. */
> +	if (unlikely(skb_is_gso(skb)))
> +		flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
> +	else
> +		flush = len < mss;
> +
>  	flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
>  					TCP_FLAG_RST | TCP_FLAG_SYN |
>  					TCP_FLAG_FIN));

LGTM, thanks!

Acked-by: Paolo Abeni <pabeni@redhat.com>
patchwork-bot+netdevbpf@kernel.org Oct. 3, 2022, 11:40 a.m. UTC | #2
Hello:

This patch was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:

On Fri, 30 Sep 2022 15:09:05 -0700 you wrote:
> From: Coco Li <lixiaoyan@google.com>
> 
> Current GRO stack only supports incoming packets containing
> one frame/MSS.
> 
> This patch changes GRO to accept packets that are already GRO.
> 
> [...]

Here is the summary with links:
  - [v2,net-next] gro: add support of (hw)gro packets to gro stack
    https://git.kernel.org/netdev/net-next/c/5eddb24901ee

You are awesome, thank you!
diff mbox series

Patch

diff --git a/net/core/gro.c b/net/core/gro.c
index b4190eb084672fb4f2be8b437eccb4e8507ff63f..bc9451743307bc380cca96ae6995aa0a3b83d185 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -160,6 +160,7 @@  int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 	unsigned int gro_max_size;
 	unsigned int new_truesize;
 	struct sk_buff *lp;
+	int segs;
 
 	/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
 	gro_max_size = READ_ONCE(p->dev->gro_max_size);
@@ -175,6 +176,7 @@  int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 			return -E2BIG;
 	}
 
+	segs = NAPI_GRO_CB(skb)->count;
 	lp = NAPI_GRO_CB(p)->last;
 	pinfo = skb_shinfo(lp);
 
@@ -265,7 +267,7 @@  int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 	lp = p;
 
 done:
-	NAPI_GRO_CB(p)->count++;
+	NAPI_GRO_CB(p)->count += segs;
 	p->data_len += len;
 	p->truesize += delta_truesize;
 	p->len += len;
@@ -496,8 +498,15 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
 					 sizeof(u32))); /* Avoid slow unaligned acc */
 		*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
-		NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
+		NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
 		NAPI_GRO_CB(skb)->is_atomic = 1;
+		NAPI_GRO_CB(skb)->count = 1;
+		if (unlikely(skb_is_gso(skb))) {
+			NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
+			/* Only support TCP at the moment. */
+			if (!skb_is_gso_tcp(skb))
+				NAPI_GRO_CB(skb)->flush = 1;
+		}
 
 		/* Setup for GRO checksum validation */
 		switch (skb->ip_summed) {
@@ -545,10 +554,10 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	else
 		gro_list->count++;
 
-	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
-	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
+	if (!skb_is_gso(skb))
+		skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 	list_add(&skb->list, &gro_list->list);
 	ret = GRO_HELD;
 
@@ -660,6 +669,7 @@  static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 
 	skb->encapsulation = 0;
 	skb_shinfo(skb)->gso_type = 0;
+	skb_shinfo(skb)->gso_size = 0;
 	if (unlikely(skb->slow_gro)) {
 		skb_orphan(skb);
 		skb_ext_reset(skb);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index a844a0d38482d916251f3aca4555c75c9770820c..45dda788938704c3f762256266d9ea29b6ded4a5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -255,7 +255,15 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 	mss = skb_shinfo(p)->gso_size;
 
-	flush |= (len - 1) >= mss;
+	/* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
+	 * If it is a single frame, do not aggregate it if its length
+	 * is bigger than our mss.
+	 */
+	if (unlikely(skb_is_gso(skb)))
+		flush |= (mss != skb_shinfo(skb)->gso_size);
+	else
+		flush |= (len - 1) >= mss;
+
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 #ifdef CONFIG_TLS_DEVICE
 	flush |= p->decrypted ^ skb->decrypted;
@@ -269,7 +277,12 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
 
 out_check_final:
-	flush = len < mss;
+	/* Force a flush if last segment is smaller than mss. */
+	if (unlikely(skb_is_gso(skb)))
+		flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
+	else
+		flush = len < mss;
+
 	flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
 					TCP_FLAG_RST | TCP_FLAG_SYN |
 					TCP_FLAG_FIN));