Message ID | 20220930220905.2019461-1-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 5eddb24901ee49eee23c0bfce6af2e83fd5679bd |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [v2,net-next] gro: add support of (hw)gro packets to gro stack | expand |
On Fri, 2022-09-30 at 15:09 -0700, Eric Dumazet wrote: > From: Coco Li <lixiaoyan@google.com> > > Current GRO stack only supports incoming packets containing > one frame/MSS. > > This patch changes GRO to accept packets that are already GRO. > > HW-GRO (aka RSC for some vendors) is very often limited in presence > of interleaved packets. Linux SW GRO stack can complete the job > and provide larger GRO packets, thus reducing rate of ACK packets > and cpu overhead. > > This also means BIG TCP can still be used, even if HW-GRO/RSC was > able to cook ~64 KB GRO packets. > > v2: fix logic in tcp_gro_receive() > > Only support TCP for the moment (Paolo) > > Co-Developed-by: Eric Dumazet <edumazet@google.com> > Signed-off-by: Eric Dumazet <edumazet@google.com> > Signed-off-by: Coco Li <lixiaoyan@google.com> > --- > net/core/gro.c | 18 ++++++++++++++---- > net/ipv4/tcp_offload.c | 17 +++++++++++++++-- > 2 files changed, 29 insertions(+), 6 deletions(-) > > diff --git a/net/core/gro.c b/net/core/gro.c > index b4190eb084672fb4f2be8b437eccb4e8507ff63f..bc9451743307bc380cca96ae6995aa0a3b83d185 100644 > --- a/net/core/gro.c > +++ b/net/core/gro.c > @@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) > unsigned int gro_max_size; > unsigned int new_truesize; > struct sk_buff *lp; > + int segs; > > /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ > gro_max_size = READ_ONCE(p->dev->gro_max_size); > @@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) > return -E2BIG; > } > > + segs = NAPI_GRO_CB(skb)->count; > lp = NAPI_GRO_CB(p)->last; > pinfo = skb_shinfo(lp); > > @@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) > lp = p; > > done: > - NAPI_GRO_CB(p)->count++; > + NAPI_GRO_CB(p)->count += segs; > p->data_len += len; > p->truesize += delta_truesize; > p->len += len; > @@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), > sizeof(u32))); /* Avoid slow unaligned acc */ > *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; > - NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); > + NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); > NAPI_GRO_CB(skb)->is_atomic = 1; > + NAPI_GRO_CB(skb)->count = 1; > + if (unlikely(skb_is_gso(skb))) { > + NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; > + /* Only support TCP at the moment. */ > + if (!skb_is_gso_tcp(skb)) > + NAPI_GRO_CB(skb)->flush = 1; > + } > > /* Setup for GRO checksum validation */ > switch (skb->ip_summed) { > @@ -545,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > else > gro_list->count++; > > - NAPI_GRO_CB(skb)->count = 1; > NAPI_GRO_CB(skb)->age = jiffies; > NAPI_GRO_CB(skb)->last = skb; > - skb_shinfo(skb)->gso_size = skb_gro_len(skb); > + if (!skb_is_gso(skb)) > + skb_shinfo(skb)->gso_size = skb_gro_len(skb); > list_add(&skb->list, &gro_list->list); > ret = GRO_HELD; > > @@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) > > skb->encapsulation = 0; > skb_shinfo(skb)->gso_type = 0; > + skb_shinfo(skb)->gso_size = 0; > if (unlikely(skb->slow_gro)) { > skb_orphan(skb); > skb_ext_reset(skb); > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > index a844a0d38482d916251f3aca4555c75c9770820c..45dda788938704c3f762256266d9ea29b6ded4a5 100644 > --- a/net/ipv4/tcp_offload.c > +++ b/net/ipv4/tcp_offload.c > @@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) > > mss = skb_shinfo(p)->gso_size; > > - flush |= (len - 1) >= mss; > + /* If skb is a GRO packet, make sure its gso_size matches prior packet mss. > + * If it is a single frame, do not aggregate it if its length > + * is bigger than our mss. > + */ > + if (unlikely(skb_is_gso(skb))) > + flush |= (mss != skb_shinfo(skb)->gso_size); > + else > + flush |= (len - 1) >= mss; > + > flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); > #ifdef CONFIG_TLS_DEVICE > flush |= p->decrypted ^ skb->decrypted; > @@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) > tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); > > out_check_final: > - flush = len < mss; > + /* Force a flush if last segment is smaller than mss. */ > + if (unlikely(skb_is_gso(skb))) > + flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size; > + else > + flush = len < mss; > + > flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | > TCP_FLAG_RST | TCP_FLAG_SYN | > TCP_FLAG_FIN)); LGTM, thanks! Acked-by: Paolo Abeni <pabeni@redhat.com>
Hello: This patch was applied to netdev/net-next.git (master) by David S. Miller <davem@davemloft.net>: On Fri, 30 Sep 2022 15:09:05 -0700 you wrote: > From: Coco Li <lixiaoyan@google.com> > > Current GRO stack only supports incoming packets containing > one frame/MSS. > > This patch changes GRO to accept packets that are already GRO. > > [...] Here is the summary with links: - [v2,net-next] gro: add support of (hw)gro packets to gro stack https://git.kernel.org/netdev/net-next/c/5eddb24901ee You are awesome, thank you!
diff --git a/net/core/gro.c b/net/core/gro.c index b4190eb084672fb4f2be8b437eccb4e8507ff63f..bc9451743307bc380cca96ae6995aa0a3b83d185 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int gro_max_size; unsigned int new_truesize; struct sk_buff *lp; + int segs; /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ gro_max_size = READ_ONCE(p->dev->gro_max_size); @@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) return -E2BIG; } + segs = NAPI_GRO_CB(skb)->count; lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); @@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) lp = p; done: - NAPI_GRO_CB(p)->count++; + NAPI_GRO_CB(p)->count += segs; p->data_len += len; p->truesize += delta_truesize; p->len += len; @@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), sizeof(u32))); /* Avoid slow unaligned acc */ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; - NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); + NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); NAPI_GRO_CB(skb)->is_atomic = 1; + NAPI_GRO_CB(skb)->count = 1; + if (unlikely(skb_is_gso(skb))) { + NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; + /* Only support TCP at the moment. */ + if (!skb_is_gso_tcp(skb)) + NAPI_GRO_CB(skb)->flush = 1; + } /* Setup for GRO checksum validation */ switch (skb->ip_summed) { @@ -545,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff else gro_list->count++; - NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; - skb_shinfo(skb)->gso_size = skb_gro_len(skb); + if (!skb_is_gso(skb)) + skb_shinfo(skb)->gso_size = skb_gro_len(skb); list_add(&skb->list, &gro_list->list); ret = GRO_HELD; @@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_size = 0; if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a844a0d38482d916251f3aca4555c75c9770820c..45dda788938704c3f762256266d9ea29b6ded4a5 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) mss = skb_shinfo(p)->gso_size; - flush |= (len - 1) >= mss; + /* If skb is a GRO packet, make sure its gso_size matches prior packet mss. + * If it is a single frame, do not aggregate it if its length + * is bigger than our mss. + */ + if (unlikely(skb_is_gso(skb))) + flush |= (mss != skb_shinfo(skb)->gso_size); + else + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); #ifdef CONFIG_TLS_DEVICE flush |= p->decrypted ^ skb->decrypted; @@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = len < mss; + /* Force a flush if last segment is smaller than mss. */ + if (unlikely(skb_is_gso(skb))) + flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size; + else + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN));