diff mbox series

[net-next,2/4] net: add support for segmenting TCP fraglist GSO packets

Message ID 20240424180458.56211-3-nbd@nbd.name (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Add TCP fraglist GRO support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 932 this patch: 932
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 938 this patch: 938
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 943 this patch: 943
netdev/checkpatch warning CHECK: Unnecessary parentheses around 'flags == flags2' WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Felix Fietkau April 24, 2024, 6:04 p.m. UTC
Preparation for adding TCP fraglist GRO support. It expects packets to be
combined in a similar way as UDP fraglist GSO packets.
One difference is the fact that this code assumes that the TCP flags of
all packets have the same value. This allows simple handling of flags
mutations. For IPv4 packets, NAT is handled in the same way as UDP
fraglist GSO.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 net/ipv4/tcp_offload.c   | 74 ++++++++++++++++++++++++++++++++++++++++
 net/ipv6/tcpv6_offload.c | 37 ++++++++++++++++++++
 2 files changed, 111 insertions(+)

Comments

Willem de Bruijn April 25, 2024, 3:03 a.m. UTC | #1
Felix Fietkau wrote:
> Preparation for adding TCP fraglist GRO support. It expects packets to be
> combined in a similar way as UDP fraglist GSO packets.
> One difference is the fact that this code assumes that the TCP flags of
> all packets have the same value. This allows simple handling of flags
> mutations.

Can you clarify this some more? We expect potentially different flags
on first and last packet in a TSO train. With fraglist, the segments
keep their original flags, as the headers are only pulled. When do
segment flags need to be replaced with those of the first segment?

> For IPv4 packets, NAT is handled in the same way as UDP
> fraglist GSO.
> 
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> ---
>  net/ipv4/tcp_offload.c   | 74 ++++++++++++++++++++++++++++++++++++++++
>  net/ipv6/tcpv6_offload.c | 37 ++++++++++++++++++++
>  2 files changed, 111 insertions(+)
> 
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index fab0973f995b..06dbb2e2b2f3 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -28,6 +28,77 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
>  	}
>  }
>  
> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
> +				     __be32 *oldip, __be32 *newip,
> +				     __be16 *oldport, __be16 *newport)
> +{
> +	struct tcphdr *th;
> +	struct iphdr *iph;
> +
> +	if (*oldip == *newip && *oldport == *newport)
> +		return;
> +
> +	th = tcp_hdr(seg);
> +	iph = ip_hdr(seg);
> +
> +	inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
> +	inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
> +	*oldport = *newport;
> +
> +	csum_replace4(&iph->check, *oldip, *newip);
> +	*oldip = *newip;
> +}
> +
> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
> +{
> +	struct sk_buff *seg;
> +	struct tcphdr *th, *th2;
> +	struct iphdr *iph, *iph2;
> +	__be32 flags, flags2;
> +
> +	seg = segs;
> +	th = tcp_hdr(seg);
> +	iph = ip_hdr(seg);
> +	flags = tcp_flag_word(th);
> +	flags2 = tcp_flag_word(tcp_hdr(seg->next));
> +
> +	if ((tcp_hdr(seg)->dest == tcp_hdr(seg->next)->dest) &&
> +	    (tcp_hdr(seg)->source == tcp_hdr(seg->next)->source) &&
> +	    (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
> +	    (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr) &&
> +	    (flags == flags2))
> +		return segs;
> +
> +	while ((seg = seg->next)) {
> +		th2 = tcp_hdr(seg);
> +		iph2 = ip_hdr(seg);
> +
> +		__tcpv4_gso_segment_csum(seg,
> +					 &iph2->saddr, &iph->saddr,
> +					 &th2->source, &th->source);
> +		__tcpv4_gso_segment_csum(seg,
> +					 &iph2->daddr, &iph->daddr,
> +					 &th2->dest, &th->dest);
> +		if (flags == flags2)
> +			continue;
> +
> +		inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false);
> +		tcp_flag_word(th2) = flags;
> +	}
> +
> +	return segs;
> +}
> +
> +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb,
> +					      netdev_features_t features)

For consistency and to avoid having the same name in ipv6,
add the 4/6 suffix here too.

> +{
> +	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> +	if (IS_ERR(skb))
> +		return skb;
> +
> +	return __tcpv4_gso_segment_list_csum(skb);
> +}
> +
>  static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
>  					netdev_features_t features)
>  {
> @@ -37,6 +108,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
>  	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
>  		return ERR_PTR(-EINVAL);
>  
> +	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
> +		return __tcp_gso_segment_list(skb, features);
> +
>  	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
>  		const struct iphdr *iph = ip_hdr(skb);
>  		struct tcphdr *th = tcp_hdr(skb);
> diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
> index 4b07d1e6c952..12fe79cb2c10 100644
> --- a/net/ipv6/tcpv6_offload.c
> +++ b/net/ipv6/tcpv6_offload.c
> @@ -40,6 +40,40 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
>  	return 0;
>  }
>  
> +static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
> +{
> +	struct tcphdr *th, *th2;
> +	__be32 flags, flags2;
> +	struct sk_buff *seg;
> +
> +	seg = segs;
> +	th = tcp_hdr(seg);
> +	flags = tcp_flag_word(th);
> +	flags2 = tcp_flag_word(tcp_hdr(seg->next));
> +
> +	if (flags == flags2)
> +		return segs;
> +
> +	while ((seg = seg->next)) {
> +		th2 = tcp_hdr(seg);
> +
> +		inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false);
> +		tcp_flag_word(th2) = flags;
> +	}
> +
> +	return segs;
> +}
> +
> +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb,
> +					      netdev_features_t features)
> +{
> +	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> +	if (IS_ERR(skb))
> +		return skb;
> +
> +	return __tcpv6_gso_segment_list_csum(skb);
> +}
> +
>  static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
>  					netdev_features_t features)
>  {
> @@ -51,6 +85,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
>  	if (!pskb_may_pull(skb, sizeof(*th)))
>  		return ERR_PTR(-EINVAL);
>  
> +	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
> +		return __tcp_gso_segment_list(skb, features);
> +
>  	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
>  		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
>  		struct tcphdr *th = tcp_hdr(skb);
> -- 
> 2.44.0
> 
x
Felix Fietkau April 25, 2024, 7:51 a.m. UTC | #2
On 25.04.24 05:03, Willem de Bruijn wrote:
> Felix Fietkau wrote:
>> Preparation for adding TCP fraglist GRO support. It expects packets to be
>> combined in a similar way as UDP fraglist GSO packets.
>> One difference is the fact that this code assumes that the TCP flags of
>> all packets have the same value. This allows simple handling of flags
>> mutations.
> 
> Can you clarify this some more? We expect potentially different flags
> on first and last packet in a TSO train. With fraglist, the segments
> keep their original flags, as the headers are only pulled. When do
> segment flags need to be replaced with those of the first segment?

Maybe I just misunderstood a comment that Paolo made earlier regarding 
TCP header mutations. Will review this again and compare with regular TSO.

- Felix
Paolo Abeni April 26, 2024, 8 a.m. UTC | #3
On Thu, 2024-04-25 at 09:51 +0200, Felix Fietkau wrote:
> On 25.04.24 05:03, Willem de Bruijn wrote:
> > Felix Fietkau wrote:
> > > Preparation for adding TCP fraglist GRO support. It expects packets to be
> > > combined in a similar way as UDP fraglist GSO packets.
> > > One difference is the fact that this code assumes that the TCP flags of
> > > all packets have the same value. This allows simple handling of flags
> > > mutations.
> > 
> > Can you clarify this some more? We expect potentially different flags
> > on first and last packet in a TSO train. With fraglist, the segments
> > keep their original flags, as the headers are only pulled. When do
> > segment flags need to be replaced with those of the first segment?
> 
> Maybe I just misunderstood a comment that Paolo made earlier regarding 
> TCP header mutations. Will review this again and compare with regular TSO.

I likely was not clear, I'm sorry.

Let me try to rephrase. After the GRO stage, and before segmentation,
the stack could change other fields inside the TCP header (beyond
src/dst port). e.g. nftables can clear the ECN bit, or strip all the
TCP options.

The frag_list segmentation should catch such changes and update the
individual segments csum accordingly.

Note that even IPv6 could snat/dnat a packet!

The GRO stage allows aggregating with different flags. Later on, at
segmentation stage, all the individual packets except the last one will
retain the same flags of the first segment, except for the PUSH and FIN
bit, that will be cleared. The last segment will have such bit value
preserved.

Cheers,

Paolo
diff mbox series

Patch

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fab0973f995b..06dbb2e2b2f3 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -28,6 +28,77 @@  static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
 	}
 }
 
+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+				     __be32 *oldip, __be32 *newip,
+				     __be16 *oldport, __be16 *newport)
+{
+	struct tcphdr *th;
+	struct iphdr *iph;
+
+	if (*oldip == *newip && *oldport == *newport)
+		return;
+
+	th = tcp_hdr(seg);
+	iph = ip_hdr(seg);
+
+	inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
+	inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
+	*oldport = *newport;
+
+	csum_replace4(&iph->check, *oldip, *newip);
+	*oldip = *newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+	struct sk_buff *seg;
+	struct tcphdr *th, *th2;
+	struct iphdr *iph, *iph2;
+	__be32 flags, flags2;
+
+	seg = segs;
+	th = tcp_hdr(seg);
+	iph = ip_hdr(seg);
+	flags = tcp_flag_word(th);
+	flags2 = tcp_flag_word(tcp_hdr(seg->next));
+
+	if ((tcp_hdr(seg)->dest == tcp_hdr(seg->next)->dest) &&
+	    (tcp_hdr(seg)->source == tcp_hdr(seg->next)->source) &&
+	    (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
+	    (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr) &&
+	    (flags == flags2))
+		return segs;
+
+	while ((seg = seg->next)) {
+		th2 = tcp_hdr(seg);
+		iph2 = ip_hdr(seg);
+
+		__tcpv4_gso_segment_csum(seg,
+					 &iph2->saddr, &iph->saddr,
+					 &th2->source, &th->source);
+		__tcpv4_gso_segment_csum(seg,
+					 &iph2->daddr, &iph->daddr,
+					 &th2->dest, &th->dest);
+		if (flags == flags2)
+			continue;
+
+		inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false);
+		tcp_flag_word(th2) = flags;
+	}
+
+	return segs;
+}
+
+static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb,
+					      netdev_features_t features)
+{
+	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+	if (IS_ERR(skb))
+		return skb;
+
+	return __tcpv4_gso_segment_list_csum(skb);
+}
+
 static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
 					netdev_features_t features)
 {
@@ -37,6 +108,9 @@  static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		return ERR_PTR(-EINVAL);
 
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+		return __tcp_gso_segment_list(skb, features);
+
 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		const struct iphdr *iph = ip_hdr(skb);
 		struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 4b07d1e6c952..12fe79cb2c10 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -40,6 +40,40 @@  INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
 	return 0;
 }
 
+static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+	struct tcphdr *th, *th2;
+	__be32 flags, flags2;
+	struct sk_buff *seg;
+
+	seg = segs;
+	th = tcp_hdr(seg);
+	flags = tcp_flag_word(th);
+	flags2 = tcp_flag_word(tcp_hdr(seg->next));
+
+	if (flags == flags2)
+		return segs;
+
+	while ((seg = seg->next)) {
+		th2 = tcp_hdr(seg);
+
+		inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false);
+		tcp_flag_word(th2) = flags;
+	}
+
+	return segs;
+}
+
+static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb,
+					      netdev_features_t features)
+{
+	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+	if (IS_ERR(skb))
+		return skb;
+
+	return __tcpv6_gso_segment_list_csum(skb);
+}
+
 static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
 					netdev_features_t features)
 {
@@ -51,6 +85,9 @@  static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		return ERR_PTR(-EINVAL);
 
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+		return __tcp_gso_segment_list(skb, features);
+
 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 		struct tcphdr *th = tcp_hdr(skb);