Message ID | 20240426065143.4667-3-nbd@nbd.name (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add TCP fraglist GRO support | expand |
On Fri, Apr 26, 2024 at 8:51 AM Felix Fietkau <nbd@nbd.name> wrote: > > Preparation for adding TCP fraglist GRO support. It expects packets to be > combined in a similar way as UDP fraglist GSO packets. > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ > net/ipv6/tcpv6_offload.c | 3 ++ > 2 files changed, 68 insertions(+) > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > index fab0973f995b..c493e95e09a5 100644 > --- a/net/ipv4/tcp_offload.c > +++ b/net/ipv4/tcp_offload.c > @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, > } > } > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, > + __be32 *oldip, __be32 *newip, > + __be16 *oldport, __be16 *newport) Do we really need pointers for newip and newport ? > +{ > + struct tcphdr *th; > + struct iphdr *iph; > + > + if (*oldip == *newip && *oldport == *newport) > + return; > + > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); > + *oldport = *newport; > + > + csum_replace4(&iph->check, *oldip, *newip); > + *oldip = *newip; > +} > + > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) > +{ > + struct sk_buff *seg; > + struct tcphdr *th, *th2; > + struct iphdr *iph, *iph2; I would probably add a const qualifier to th and iph > + > + seg = segs; > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + th2 = tcp_hdr(seg->next); > + iph2 = ip_hdr(seg->next); > + > + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) && > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) > + return segs; > + > + while ((seg = seg->next)) { > + th2 = tcp_hdr(seg); > + iph2 = ip_hdr(seg); > + > + __tcpv4_gso_segment_csum(seg, > + &iph2->saddr, &iph->saddr, > + &th2->source, &th->source); > + __tcpv4_gso_segment_csum(seg, > + &iph2->daddr, &iph->daddr, > + &th2->dest, &th->dest); > + } > + > + return segs; > +} >
On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote: > Preparation for adding TCP fraglist GRO support. It expects packets to be > combined in a similar way as UDP fraglist GSO packets. > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ > net/ipv6/tcpv6_offload.c | 3 ++ > 2 files changed, 68 insertions(+) > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > index fab0973f995b..c493e95e09a5 100644 > --- a/net/ipv4/tcp_offload.c > +++ b/net/ipv4/tcp_offload.c > @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, > } > } > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, > + __be32 *oldip, __be32 *newip, > + __be16 *oldport, __be16 *newport) > +{ > + struct tcphdr *th; > + struct iphdr *iph; > + > + if (*oldip == *newip && *oldport == *newport) > + return; > + > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); > + *oldport = *newport; > + > + csum_replace4(&iph->check, *oldip, *newip); > + *oldip = *newip; > +} > + > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) > +{ > + struct sk_buff *seg; > + struct tcphdr *th, *th2; > + struct iphdr *iph, *iph2; > + > + seg = segs; > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + th2 = tcp_hdr(seg->next); > + iph2 = ip_hdr(seg->next); > + > + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) && > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) > + return segs; As mentioned in previous revisions, I think a problem with this approach is that the stack could make other changes to the TCP header after the GRO stage, that are unnoticed here and could cause csum corruption, if the egress device does not recompute the packet csum. Cheers, Paolo
On 26.04.24 09:44, Eric Dumazet wrote: > On Fri, Apr 26, 2024 at 8:51 AM Felix Fietkau <nbd@nbd.name> wrote: >> >> Preparation for adding TCP fraglist GRO support. It expects packets to be >> combined in a similar way as UDP fraglist GSO packets. >> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. >> >> Signed-off-by: Felix Fietkau <nbd@nbd.name> >> --- >> net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ >> net/ipv6/tcpv6_offload.c | 3 ++ >> 2 files changed, 68 insertions(+) >> >> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c >> index fab0973f995b..c493e95e09a5 100644 >> --- a/net/ipv4/tcp_offload.c >> +++ b/net/ipv4/tcp_offload.c >> @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, >> } >> } >> >> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, >> + __be32 *oldip, __be32 *newip, >> + __be16 *oldport, __be16 *newport) > > > Do we really need pointers for newip and newport ? > >> +{ >> + struct tcphdr *th; >> + struct iphdr *iph; >> + >> + if (*oldip == *newip && *oldport == *newport) >> + return; >> + >> + th = tcp_hdr(seg); >> + iph = ip_hdr(seg); >> + >> + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); >> + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); >> + *oldport = *newport; >> + >> + csum_replace4(&iph->check, *oldip, *newip); >> + *oldip = *newip; >> +} >> + >> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) >> +{ >> + struct sk_buff *seg; >> + struct tcphdr *th, *th2; >> + struct iphdr *iph, *iph2; > > I would probably add a const qualifier to th and iph Will do, thanks. - Felix
On 26.04.24 10:28, Paolo Abeni wrote: > On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote: >> Preparation for adding TCP fraglist GRO support. It expects packets to be >> combined in a similar way as UDP fraglist GSO packets. >> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. >> >> Signed-off-by: Felix Fietkau <nbd@nbd.name> >> --- >> net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ >> net/ipv6/tcpv6_offload.c | 3 ++ >> 2 files changed, 68 insertions(+) >> >> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c >> index fab0973f995b..c493e95e09a5 100644 >> --- a/net/ipv4/tcp_offload.c >> +++ b/net/ipv4/tcp_offload.c >> @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, >> } >> } >> >> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, >> + __be32 *oldip, __be32 *newip, >> + __be16 *oldport, __be16 *newport) >> +{ >> + struct tcphdr *th; >> + struct iphdr *iph; >> + >> + if (*oldip == *newip && *oldport == *newport) >> + return; >> + >> + th = tcp_hdr(seg); >> + iph = ip_hdr(seg); >> + >> + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); >> + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); >> + *oldport = *newport; >> + >> + csum_replace4(&iph->check, *oldip, *newip); >> + *oldip = *newip; >> +} >> + >> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) >> +{ >> + struct sk_buff *seg; >> + struct tcphdr *th, *th2; >> + struct iphdr *iph, *iph2; >> + >> + seg = segs; >> + th = tcp_hdr(seg); >> + iph = ip_hdr(seg); >> + th2 = tcp_hdr(seg->next); >> + iph2 = ip_hdr(seg->next); >> + >> + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) && >> + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) >> + return segs; > > As mentioned in previous revisions, I think a problem with this > approach is that the stack could make other changes to the TCP header > after the GRO stage, that are unnoticed here and could cause csum > corruption, if the egress device does not recompute the packet csum. On segmentation, each packet keeps its original TCP header and csum. If the stack makes changes, they apply to the first packet only. I don't see how we could get csum corruption. - Felix
On Fri, 2024-04-26 at 11:39 +0200, Felix Fietkau wrote: > On 26.04.24 10:28, Paolo Abeni wrote: > > On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote: > > > Preparation for adding TCP fraglist GRO support. It expects packets to be > > > combined in a similar way as UDP fraglist GSO packets. > > > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. > > > > > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > > > --- > > > net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ > > > net/ipv6/tcpv6_offload.c | 3 ++ > > > 2 files changed, 68 insertions(+) > > > > > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > > > index fab0973f995b..c493e95e09a5 100644 > > > --- a/net/ipv4/tcp_offload.c > > > +++ b/net/ipv4/tcp_offload.c > > > @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, > > > } > > > } > > > > > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, > > > + __be32 *oldip, __be32 *newip, > > > + __be16 *oldport, __be16 *newport) > > > +{ > > > + struct tcphdr *th; > > > + struct iphdr *iph; > > > + > > > + if (*oldip == *newip && *oldport == *newport) > > > + return; > > > + > > > + th = tcp_hdr(seg); > > > + iph = ip_hdr(seg); > > > + > > > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); > > > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); > > > + *oldport = *newport; > > > + > > > + csum_replace4(&iph->check, *oldip, *newip); > > > + *oldip = *newip; > > > +} > > > + > > > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) > > > +{ > > > + struct sk_buff *seg; > > > + struct tcphdr *th, *th2; > > > + struct iphdr *iph, *iph2; > > > + > > > + seg = segs; > > > + th = tcp_hdr(seg); > > > + iph = ip_hdr(seg); > > > + th2 = tcp_hdr(seg->next); > > > + iph2 = ip_hdr(seg->next); > > > + > > > + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) && > > > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) > > > + return segs; > > > > As mentioned in previous revisions, I think a problem with this > > approach is that the stack could make other changes to the TCP header > > after the GRO stage, that are unnoticed here and could cause csum > > corruption, if the egress device does not recompute the packet csum. > > On segmentation, each packet keeps its original TCP header and csum. If > the stack makes changes, they apply to the first packet only. I don't > see how we could get csum corruption. You are right. I did not take in account that such changes (to the first skb) are not reflected to the frag_list at segmentation time. The end result could be different from what the user/admin is expecting, but at least should not impact drops. Side note: alike UDP, this is not supporting IPv6 NAT... Thanks, Paolo
On 26.04.24 12:40, Paolo Abeni wrote: > On Fri, 2024-04-26 at 11:39 +0200, Felix Fietkau wrote: >> On 26.04.24 10:28, Paolo Abeni wrote: >> > On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote: >> > > Preparation for adding TCP fraglist GRO support. It expects packets to be >> > > combined in a similar way as UDP fraglist GSO packets. >> > > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. >> > > >> > > Signed-off-by: Felix Fietkau <nbd@nbd.name> >> > > --- >> > > net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ >> > > net/ipv6/tcpv6_offload.c | 3 ++ >> > > 2 files changed, 68 insertions(+) >> > > >> > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c >> > > index fab0973f995b..c493e95e09a5 100644 >> > > --- a/net/ipv4/tcp_offload.c >> > > +++ b/net/ipv4/tcp_offload.c >> > > @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, >> > > } >> > > } >> > > >> > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, >> > > + __be32 *oldip, __be32 *newip, >> > > + __be16 *oldport, __be16 *newport) >> > > +{ >> > > + struct tcphdr *th; >> > > + struct iphdr *iph; >> > > + >> > > + if (*oldip == *newip && *oldport == *newport) >> > > + return; >> > > + >> > > + th = tcp_hdr(seg); >> > > + iph = ip_hdr(seg); >> > > + >> > > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); >> > > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); >> > > + *oldport = *newport; >> > > + >> > > + csum_replace4(&iph->check, *oldip, *newip); >> > > + *oldip = *newip; >> > > +} >> > > + >> > > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) >> > > +{ >> > > + struct sk_buff *seg; >> > > + struct tcphdr *th, *th2; >> > > + struct iphdr *iph, *iph2; >> > > + >> > > + seg = segs; >> > > + th = tcp_hdr(seg); >> > > + iph = ip_hdr(seg); >> > > + th2 = tcp_hdr(seg->next); >> > > + iph2 = ip_hdr(seg->next); >> > > + >> > > + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) && >> > > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) >> > > + return segs; >> > >> > As mentioned in previous revisions, I think a problem with this >> > approach is that the stack could make other changes to the TCP header >> > after the GRO stage, that are unnoticed here and could cause csum >> > corruption, if the egress device does not recompute the packet csum. >> >> On segmentation, each packet keeps its original TCP header and csum. If >> the stack makes changes, they apply to the first packet only. I don't >> see how we could get csum corruption. > > You are right. I did not take in account that such changes (to the > first skb) are not reflected to the frag_list at segmentation time. The > end result could be different from what the user/admin is expecting, > but at least should not impact drops. > > Side note: alike UDP, this is not supporting IPv6 NAT... I will add that for both in the next version. Thanks, - Felix
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fab0973f995b..c493e95e09a5 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct tcphdr *th; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + th = tcp_hdr(seg); + iph = ip_hdr(seg); + + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct tcphdr *th, *th2; + struct iphdr *iph, *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ip_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ip_hdr(seg->next); + + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) && + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ip_hdr(seg); + + __tcpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &th2->source, &th->source); + __tcpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &th2->dest, &th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv4_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -37,6 +99,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp4_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 4b07d1e6c952..b3b8e1f6b92a 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -51,6 +51,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb);
Preparation for adding TCP fraglist GRO support. It expects packets to be combined in a similar way as UDP fraglist GSO packets. For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. Signed-off-by: Felix Fietkau <nbd@nbd.name> --- net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcpv6_offload.c | 3 ++ 2 files changed, 68 insertions(+)