diff mbox series

[net,v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

Message ID 1610716836-140533-1-git-send-email-dseok.yi@samsung.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net,v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 306 this patch: 306
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 87 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 414 this patch: 414
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Dongseok Yi Jan. 15, 2021, 1:20 p.m. UTC
UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.

A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update UDP/IP header of the segment list but copy
only the MAC header.

Update dport, daddr and checksums of each skb of the segment list
in __udp_gso_segment_list. It covers both SNAT and DNAT.

Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
---
v1:
Steffen Klassert said, there could be 2 options.
https://lore.kernel.org/patchwork/patch/1362257/
I was trying to write a quick fix, but it was not easy to forward
segmented list. Currently, assuming DNAT only.

v2:
Per Steffen Klassert request, move the procedure from
udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.

To Alexander Lobakin, I've checked your email late. Just use this
patch as a reference. It support SNAT too, but does not support IPv6
yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
to the file is in IPv4 directory.

 include/net/udp.h      |  2 +-
 net/ipv4/udp_offload.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
 net/ipv6/udp_offload.c |  2 +-
 3 files changed, 59 insertions(+), 7 deletions(-)

Comments

Alexander Lobakin Jan. 15, 2021, 5:12 p.m. UTC | #1
From: Dongseok Yi <dseok.yi@samsung.com>
Date: Fri, 15 Jan 2021 22:20:35 +0900

> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> forwarding. Only the header of head_skb from ip_finish_output_gso ->
> skb_gso_segment is updated but following frag_skbs are not updated.
> 
> A call path skb_mac_gso_segment -> inet_gso_segment ->
> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> does not try to update UDP/IP header of the segment list but copy
> only the MAC header.
> 
> Update dport, daddr and checksums of each skb of the segment list
> in __udp_gso_segment_list. It covers both SNAT and DNAT.
> 
> Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> ---
> v1:
> Steffen Klassert said, there could be 2 options.
> https://lore.kernel.org/patchwork/patch/1362257/
> I was trying to write a quick fix, but it was not easy to forward
> segmented list. Currently, assuming DNAT only.
> 
> v2:
> Per Steffen Klassert request, move the procedure from
> udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> 
> To Alexander Lobakin, I've checked your email late. Just use this
> patch as a reference. It support SNAT too, but does not support IPv6
> yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> to the file is in IPv4 directory.

I used another approach, tried to make fraglist GRO closer to plain
in terms of checksummming, as it is confusing to me why GSO packet
should have CHECKSUM_UNNECESSARY. Just let Netfilter do its mangling,
and then use classic UDP GSO magic at the end of segmentation.
I also see the idea of explicit comparing and editing of IP and UDP
headers right in __udp_gso_segment_list() rather unacceptable.

Dongseok, Steffen, please test this WIP diff and tell if this one
works for you, so I could clean up the code and make a patch.
For me, it works now in any configurations, with and without
checksum/GSO/fraglist offload.

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a6f262636a..646a42e88e83 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3674,6 +3674,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 				 unsigned int offset)
 {
 	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int delta_truesize = 0;
 	unsigned int delta_len = 0;
@@ -3681,7 +3682,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 	struct sk_buff *nskb, *tmp;
 	int err;
 
-	skb_push(skb, -skb_network_offset(skb) + offset);
+	skb_push(skb, doffset);
 
 	skb_shinfo(skb)->frag_list = NULL;
 
@@ -3716,12 +3717,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 		delta_len += nskb->len;
 		delta_truesize += nskb->truesize;
 
-		skb_push(nskb, -skb_network_offset(nskb) + offset);
+		skb_push(nskb, skb_headroom(nskb) - skb_headroom(skb));
 
 		skb_release_head_state(nskb);
-		 __copy_skb_header(nskb, skb);
+		__copy_skb_header(nskb, skb);
 
-		skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
 		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
 						 nskb->data - tnl_hlen,
 						 offset + tnl_hlen);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94781bf..61665fcd8c85 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -190,13 +190,58 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
 static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
 					      netdev_features_t features)
 {
-	unsigned int mss = skb_shinfo(skb)->gso_size;
+	struct sk_buff *seg;
+	struct udphdr *uh;
+	unsigned int mss;
+	__be16 newlen;
+	__sum16 check;
+
+	mss = skb_shinfo(skb)->gso_size;
+	if (skb->len <= sizeof(*uh) + mss)
+		return ERR_PTR(-EINVAL);
 
-	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+	skb_pull(skb, sizeof(*uh));
+
+	skb = skb_segment_list(skb, features, skb->data - skb_mac_header(skb));
 	if (IS_ERR(skb))
 		return skb;
 
-	udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+	seg = skb;
+	uh = udp_hdr(seg);
+
+	/* compute checksum adjustment based on old length versus new */
+	newlen = htons(sizeof(*uh) + mss);
+	check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
+
+	for (;;) {
+		if (!seg->next)
+			break;
+
+		uh->len = newlen;
+		uh->check = check;
+
+		if (seg->ip_summed == CHECKSUM_PARTIAL)
+			gso_reset_checksum(seg, ~check);
+		else
+			uh->check = gso_make_checksum(seg, ~check) ? :
+				    CSUM_MANGLED_0;
+
+		seg = seg->next;
+		uh = udp_hdr(seg);
+	}
+
+	/* last packet can be partial gso_size, account for that in checksum */
+	newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
+		       seg->data_len);
+	check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
+
+	uh->len = newlen;
+	uh->check = check;
+
+	if (seg->ip_summed == CHECKSUM_PARTIAL)
+		gso_reset_checksum(seg, ~check);
+	else
+		uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
 
 	return skb;
 }
@@ -602,27 +647,13 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (NAPI_GRO_CB(skb)->is_flist) {
-		uh->len = htons(skb->len - nhoff);
-
-		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
-		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
-
-		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-			if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
-				skb->csum_level++;
-		} else {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			skb->csum_level = 0;
-		}
-
-		return 0;
-	}
-
 	if (uh->check)
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
 
+	if (NAPI_GRO_CB(skb)->is_flist)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST;
+
 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }
Dongseok Yi Jan. 17, 2021, 11:55 p.m. UTC | #2
On 2021-01-16 02:12, Alexander Lobakin wrote:
> From: Dongseok Yi <dseok.yi@samsung.com>
> Date: Fri, 15 Jan 2021 22:20:35 +0900
> 
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> >
> > v2:
> > Per Steffen Klassert request, move the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> >
> > To Alexander Lobakin, I've checked your email late. Just use this
> > patch as a reference. It support SNAT too, but does not support IPv6
> > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > to the file is in IPv4 directory.
> 
> I used another approach, tried to make fraglist GRO closer to plain
> in terms of checksummming, as it is confusing to me why GSO packet
> should have CHECKSUM_UNNECESSARY. Just let Netfilter do its mangling,
> and then use classic UDP GSO magic at the end of segmentation.
> I also see the idea of explicit comparing and editing of IP and UDP
> headers right in __udp_gso_segment_list() rather unacceptable.

If I understand UDP GRO fraglist correctly, it keeps the length of
each skb of the fraglist. But your approach might change the lengths
by gso_size. What if each skb of the fraglist had different lengths?

For CHECKSUM_UNNECESSARY, GROed head_skb might have an invalid
checksum. But finally, the fraglist will be segmented to queue to
sk_receive_queue with head_skb. We could pass the GROed head_skb with
CHECKSUM_UNNECESSARY.

> 
> Dongseok, Steffen, please test this WIP diff and tell if this one
> works for you, so I could clean up the code and make a patch.
> For me, it works now in any configurations, with and without
> checksum/GSO/fraglist offload.
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index c1a6f262636a..646a42e88e83 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3674,6 +3674,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>  				 unsigned int offset)
>  {
>  	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> +	unsigned int doffset = skb->data - skb_mac_header(skb);
>  	unsigned int tnl_hlen = skb_tnl_header_len(skb);
>  	unsigned int delta_truesize = 0;
>  	unsigned int delta_len = 0;
> @@ -3681,7 +3682,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>  	struct sk_buff *nskb, *tmp;
>  	int err;
> 
> -	skb_push(skb, -skb_network_offset(skb) + offset);
> +	skb_push(skb, doffset);
> 
>  	skb_shinfo(skb)->frag_list = NULL;
> 
> @@ -3716,12 +3717,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>  		delta_len += nskb->len;
>  		delta_truesize += nskb->truesize;
> 
> -		skb_push(nskb, -skb_network_offset(nskb) + offset);
> +		skb_push(nskb, skb_headroom(nskb) - skb_headroom(skb));
> 
>  		skb_release_head_state(nskb);
> -		 __copy_skb_header(nskb, skb);
> +		__copy_skb_header(nskb, skb);
> 
> -		skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
>  		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
>  						 nskb->data - tnl_hlen,
>  						 offset + tnl_hlen);
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index ff39e94781bf..61665fcd8c85 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -190,13 +190,58 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
>  static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
>  					      netdev_features_t features)
>  {
> -	unsigned int mss = skb_shinfo(skb)->gso_size;
> +	struct sk_buff *seg;
> +	struct udphdr *uh;
> +	unsigned int mss;
> +	__be16 newlen;
> +	__sum16 check;
> +
> +	mss = skb_shinfo(skb)->gso_size;
> +	if (skb->len <= sizeof(*uh) + mss)
> +		return ERR_PTR(-EINVAL);
> 
> -	skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> +	skb_pull(skb, sizeof(*uh));
> +
> +	skb = skb_segment_list(skb, features, skb->data - skb_mac_header(skb));
>  	if (IS_ERR(skb))
>  		return skb;
> 
> -	udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> +	seg = skb;
> +	uh = udp_hdr(seg);
> +
> +	/* compute checksum adjustment based on old length versus new */
> +	newlen = htons(sizeof(*uh) + mss);
> +	check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
> +
> +	for (;;) {
> +		if (!seg->next)
> +			break;
> +
> +		uh->len = newlen;
> +		uh->check = check;
> +
> +		if (seg->ip_summed == CHECKSUM_PARTIAL)
> +			gso_reset_checksum(seg, ~check);
> +		else
> +			uh->check = gso_make_checksum(seg, ~check) ? :
> +				    CSUM_MANGLED_0;
> +
> +		seg = seg->next;
> +		uh = udp_hdr(seg);
> +	}
> +
> +	/* last packet can be partial gso_size, account for that in checksum */
> +	newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
> +		       seg->data_len);
> +	check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
> +
> +	uh->len = newlen;
> +	uh->check = check;
> +
> +	if (seg->ip_summed == CHECKSUM_PARTIAL)
> +		gso_reset_checksum(seg, ~check);
> +	else
> +		uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
> 
>  	return skb;
>  }
> @@ -602,27 +647,13 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
>  	const struct iphdr *iph = ip_hdr(skb);
>  	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
> 
> -	if (NAPI_GRO_CB(skb)->is_flist) {
> -		uh->len = htons(skb->len - nhoff);
> -
> -		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> -		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> -
> -		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
> -			if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
> -				skb->csum_level++;
> -		} else {
> -			skb->ip_summed = CHECKSUM_UNNECESSARY;
> -			skb->csum_level = 0;
> -		}
> -
> -		return 0;
> -	}
> -
>  	if (uh->check)
>  		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
>  					  iph->daddr, 0);
> 
> +	if (NAPI_GRO_CB(skb)->is_flist)
> +		skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST;
> +
>  	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
>  }
>
Steffen Klassert Jan. 18, 2021, 6:37 a.m. UTC | #3
On Fri, Jan 15, 2021 at 05:12:33PM +0000, Alexander Lobakin wrote:
> From: Dongseok Yi <dseok.yi@samsung.com>
> Date: Fri, 15 Jan 2021 22:20:35 +0900
> 
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> > 
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> > 
> > Update dport, daddr and checksums of each skb of the segment list
> > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > 
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> > 
> > v2:
> > Per Steffen Klassert request, move the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > 
> > To Alexander Lobakin, I've checked your email late. Just use this
> > patch as a reference. It support SNAT too, but does not support IPv6
> > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > to the file is in IPv4 directory.
> 
> I used another approach, tried to make fraglist GRO closer to plain
> in terms of checksummming, as it is confusing to me why GSO packet
> should have CHECKSUM_UNNECESSARY.

This is intentional. With fraglist GRO, we don't mangle packets
in the standard (non NAT) case. So the checksum is still correct
after segmentation. That is one reason why it has good forwarding
performance when software segmentation is needed. Checksuming
touches the whole packet and has a lot of overhead, so it is
heplfull to avoid it whenever possible.

We should find a way to do the checksum only when we really
need it. I.e. only if the headers of the head skb changed.
Dongseok Yi Jan. 18, 2021, 7:23 a.m. UTC | #4
On 2021-01-18 15:37, Steffen Klassert wrote:
> On Fri, Jan 15, 2021 at 05:12:33PM +0000, Alexander Lobakin wrote:
> > From: Dongseok Yi <dseok.yi@samsung.com>
> > Date: Fri, 15 Jan 2021 22:20:35 +0900
> >
> > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > skb_gso_segment is updated but following frag_skbs are not updated.
> > >
> > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > does not try to update UDP/IP header of the segment list but copy
> > > only the MAC header.
> > >
> > > Update dport, daddr and checksums of each skb of the segment list
> > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > >
> > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > > ---
> > > v1:
> > > Steffen Klassert said, there could be 2 options.
> > > https://lore.kernel.org/patchwork/patch/1362257/
> > > I was trying to write a quick fix, but it was not easy to forward
> > > segmented list. Currently, assuming DNAT only.
> > >
> > > v2:
> > > Per Steffen Klassert request, move the procedure from
> > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > >
> > > To Alexander Lobakin, I've checked your email late. Just use this
> > > patch as a reference. It support SNAT too, but does not support IPv6
> > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > to the file is in IPv4 directory.
> >
> > I used another approach, tried to make fraglist GRO closer to plain
> > in terms of checksummming, as it is confusing to me why GSO packet
> > should have CHECKSUM_UNNECESSARY.
> 
> This is intentional. With fraglist GRO, we don't mangle packets
> in the standard (non NAT) case. So the checksum is still correct
> after segmentation. That is one reason why it has good forwarding
> performance when software segmentation is needed. Checksuming
> touches the whole packet and has a lot of overhead, so it is
> heplfull to avoid it whenever possible.
> 
> We should find a way to do the checksum only when we really
> need it. I.e. only if the headers of the head skb changed.

It would be not easy to detect if the skb is mangled by netfilter. I
think v2 patch has little impact on the performance. Can you suggest
an another version? If not, I can make v3 including 80 columns
warning fix.
Alexander Lobakin Jan. 18, 2021, 12:17 p.m. UTC | #5
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Mon, 18 Jan 2021 07:37:59 +0100
>
> On Fri, Jan 15, 2021 at 05:12:33PM +0000, Alexander Lobakin wrote:
>> From: Dongseok Yi <dseok.yi@samsung.com>
>> Date: Fri, 15 Jan 2021 22:20:35 +0900
>>
>>> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
>>> forwarding. Only the header of head_skb from ip_finish_output_gso ->
>>> skb_gso_segment is updated but following frag_skbs are not updated.
>>>
>>> A call path skb_mac_gso_segment -> inet_gso_segment ->
>>> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
>>> does not try to update UDP/IP header of the segment list but copy
>>> only the MAC header.
>>>
>>> Update dport, daddr and checksums of each skb of the segment list
>>> in __udp_gso_segment_list. It covers both SNAT and DNAT.
>>>
>>> Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
>>> Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
>>> ---
>>> v1:
>>> Steffen Klassert said, there could be 2 options.
>>> https://lore.kernel.org/patchwork/patch/1362257/
>>> I was trying to write a quick fix, but it was not easy to forward
>>> segmented list. Currently, assuming DNAT only.
>>>
>>> v2:
>>> Per Steffen Klassert request, move the procedure from
>>> udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
>>>
>>> To Alexander Lobakin, I've checked your email late. Just use this
>>> patch as a reference. It support SNAT too, but does not support IPv6
>>> yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
>>> to the file is in IPv4 directory.
>>
>> I used another approach, tried to make fraglist GRO closer to plain
>> in terms of checksummming, as it is confusing to me why GSO packet
>> should have CHECKSUM_UNNECESSARY.
>
> This is intentional. With fraglist GRO, we don't mangle packets
> in the standard (non NAT) case. So the checksum is still correct
> after segmentation. That is one reason why it has good forwarding
> performance when software segmentation is needed. Checksuming
> touches the whole packet and has a lot of overhead, so it is
> heplfull to avoid it whenever possible.
>
> We should find a way to do the checksum only when we really
> need it. I.e. only if the headers of the head skb changed.

I suggest to do memcmp() between skb_network_header(skb) and
skb_network_header(skb->frag_list) with the len of
skb->data - skb_network_header(skb). This way we will detect changes
in IPv4/IPv6 and UDP headers.
If so, copy the full headers and fall back to the standard checksum,
recalculation, else use the current path.

Al
Steffen Klassert Jan. 18, 2021, 12:58 p.m. UTC | #6
On Mon, Jan 18, 2021 at 12:17:34PM +0000, Alexander Lobakin wrote:
> > From: Steffen Klassert <steffen.klassert@secunet.com>
> > Date: Mon, 18 Jan 2021 07:37:59 +0100
> > On Fri, Jan 15, 2021 at 05:12:33PM +0000, Alexander Lobakin wrote:
> >>
> >> I used another approach, tried to make fraglist GRO closer to plain
> >> in terms of checksummming, as it is confusing to me why GSO packet
> >> should have CHECKSUM_UNNECESSARY.
> >
> > This is intentional. With fraglist GRO, we don't mangle packets
> > in the standard (non NAT) case. So the checksum is still correct
> > after segmentation. That is one reason why it has good forwarding
> > performance when software segmentation is needed. Checksuming
> > touches the whole packet and has a lot of overhead, so it is
> > heplfull to avoid it whenever possible.
> >
> > We should find a way to do the checksum only when we really
> > need it. I.e. only if the headers of the head skb changed.
> 
> I suggest to do memcmp() between skb_network_header(skb) and
> skb_network_header(skb->frag_list) with the len of
> skb->data - skb_network_header(skb). This way we will detect changes
> in IPv4/IPv6 and UDP headers.

I thought about that too. Bbut with fraglist GRO, the length of
the packets can vary. Unlike standard GRO, there is no requirement
that the packets in the fraglist must be equal in length here. So
we can't compare the full headers. I think we need to test for
addresses and ports.

> If so, copy the full headers and fall back to the standard checksum,
> recalculation, else use the current path.

I agree that we should fallback to standard checksum recalculation
if the addresses or ports changed.
Steffen Klassert Jan. 18, 2021, 1:27 p.m. UTC | #7
On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> forwarding. Only the header of head_skb from ip_finish_output_gso ->
> skb_gso_segment is updated but following frag_skbs are not updated.
> 
> A call path skb_mac_gso_segment -> inet_gso_segment ->
> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> does not try to update UDP/IP header of the segment list but copy
> only the MAC header.
> 
> Update dport, daddr and checksums of each skb of the segment list
> in __udp_gso_segment_list. It covers both SNAT and DNAT.
> 
> Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> ---
> v1:
> Steffen Klassert said, there could be 2 options.
> https://lore.kernel.org/patchwork/patch/1362257/
> I was trying to write a quick fix, but it was not easy to forward
> segmented list. Currently, assuming DNAT only.
> 
> v2:
> Per Steffen Klassert request, move the procedure from
> udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> 
> To Alexander Lobakin, I've checked your email late. Just use this
> patch as a reference. It support SNAT too, but does not support IPv6
> yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> to the file is in IPv4 directory.
> 
>  include/net/udp.h      |  2 +-
>  net/ipv4/udp_offload.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
>  net/ipv6/udp_offload.c |  2 +-
>  3 files changed, 59 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/udp.h b/include/net/udp.h
> index 877832b..01351ba 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
>  
>  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> -				  netdev_features_t features);
> +				  netdev_features_t features, bool is_ipv6);
>  
>  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
>  {
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index ff39e94..c532d3b 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(skb_udp_tunnel_segment);
>  
> +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> +				     __be32 *oldip, __be32 *newip,
> +				     __be16 *oldport, __be16 *newport)
> +{
> +	struct udphdr *uh = udp_hdr(seg);
> +	struct iphdr *iph = ip_hdr(seg);
> +
> +	if (uh->check) {
> +		inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> +					 true);
> +		inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> +					 false);
> +		if (!uh->check)
> +			uh->check = CSUM_MANGLED_0;
> +	}
> +	uh->dest = *newport;
> +
> +	csum_replace4(&iph->check, *oldip, *newip);
> +	iph->daddr = *newip;
> +}

Can't we just do the checksum recalculation for this case as it is done
with standard GRO?

> +
> +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> +{
> +	struct sk_buff *seg;
> +	struct udphdr *uh, *uh2;
> +	struct iphdr *iph, *iph2;
> +
> +	seg = segs;
> +	uh = udp_hdr(seg);
> +	iph = ip_hdr(seg);
> +
> +	while ((seg = seg->next)) {
> +		uh2 = udp_hdr(seg);
> +		iph2 = ip_hdr(seg);
> +
> +		if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> +			__udpv4_gso_segment_csum(seg,
> +						 &iph2->saddr, &iph->saddr,
> +						 &uh2->source, &uh->source);
> +
> +		if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> +			__udpv4_gso_segment_csum(seg,
> +						 &iph2->daddr, &iph->daddr,
> +						 &uh2->dest, &uh->dest);
> +	}

You don't need to check the addresses and ports of all packets in the
segment list. If the addresses and ports are equal for the first and
second packet in the list, then this also holds for the rest of the
packets.
Dongseok Yi Jan. 20, 2021, 6:55 a.m. UTC | #8
On 2021-01-18 22:27, Steffen Klassert wrote:
> On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> >
> > v2:
> > Per Steffen Klassert request, move the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> >
> > To Alexander Lobakin, I've checked your email late. Just use this
> > patch as a reference. It support SNAT too, but does not support IPv6
> > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > to the file is in IPv4 directory.
> >
> >  include/net/udp.h      |  2 +-
> >  net/ipv4/udp_offload.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
> >  net/ipv6/udp_offload.c |  2 +-
> >  3 files changed, 59 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/udp.h b/include/net/udp.h
> > index 877832b..01351ba 100644
> > --- a/include/net/udp.h
> > +++ b/include/net/udp.h
> > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> >
> >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > -				  netdev_features_t features);
> > +				  netdev_features_t features, bool is_ipv6);
> >
> >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> >  {
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94..c532d3b 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> >  }
> >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >
> > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > +				     __be32 *oldip, __be32 *newip,
> > +				     __be16 *oldport, __be16 *newport)
> > +{
> > +	struct udphdr *uh = udp_hdr(seg);
> > +	struct iphdr *iph = ip_hdr(seg);
> > +
> > +	if (uh->check) {
> > +		inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > +					 true);
> > +		inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > +					 false);
> > +		if (!uh->check)
> > +			uh->check = CSUM_MANGLED_0;
> > +	}
> > +	uh->dest = *newport;
> > +
> > +	csum_replace4(&iph->check, *oldip, *newip);
> > +	iph->daddr = *newip;
> > +}
> 
> Can't we just do the checksum recalculation for this case as it is done
> with standard GRO?

If I understand standard GRO correctly, it calculates full checksum.
Should we adopt the same method to UDP GRO fraglist? I did not find
a simple method for the incremental checksum update.

> 
> > +
> > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > +{
> > +	struct sk_buff *seg;
> > +	struct udphdr *uh, *uh2;
> > +	struct iphdr *iph, *iph2;
> > +
> > +	seg = segs;
> > +	uh = udp_hdr(seg);
> > +	iph = ip_hdr(seg);
> > +
> > +	while ((seg = seg->next)) {
> > +		uh2 = udp_hdr(seg);
> > +		iph2 = ip_hdr(seg);
> > +
> > +		if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> > +			__udpv4_gso_segment_csum(seg,
> > +						 &iph2->saddr, &iph->saddr,
> > +						 &uh2->source, &uh->source);
> > +
> > +		if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> > +			__udpv4_gso_segment_csum(seg,
> > +						 &iph2->daddr, &iph->daddr,
> > +						 &uh2->dest, &uh->dest);
> > +	}
> 
> You don't need to check the addresses and ports of all packets in the
> segment list. If the addresses and ports are equal for the first and
> second packet in the list, then this also holds for the rest of the
> packets.

I think we can try this with an additional flag (seg_csum).

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 36b7e30..3f892df 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -213,25 +213,36 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
        struct sk_buff *seg;
        struct udphdr *uh, *uh2;
        struct iphdr *iph, *iph2;
+       bool seg_csum = false;

        seg = segs;
        uh = udp_hdr(seg);
        iph = ip_hdr(seg);

-       while ((seg = seg->next)) {
+       seg = seg->next;
+       do {
+               if (!seg)
+                       break;
+
                uh2 = udp_hdr(seg);
                iph2 = ip_hdr(seg);

-               if (uh->source != uh2->source || iph->saddr != iph2->saddr)
+               if (uh->source != uh2->source || iph->saddr != iph2->saddr) {
                        __udpv4_gso_segment_csum(seg,
                                                 &iph2->saddr, &iph->saddr,
                                                 &uh2->source, &uh->source);
+                       seg_csum = true;
+               }

-               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
+               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr) {
                        __udpv4_gso_segment_csum(seg,
                                                 &iph2->daddr, &iph->daddr,
                                                 &uh2->dest, &uh->dest);
-       }
+                       seg_csum = true;
+               }
+
+               seg = seg->next;
+       } while (seg_csum);

        return segs;
 }
Dongseok Yi Jan. 21, 2021, 12:13 p.m. UTC | #9
On 2021-01-20 15:56, Dongseok Yi wrote:
> On 2021-01-18 22:27, Steffen Klassert wrote:
> > On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > skb_gso_segment is updated but following frag_skbs are not updated.
> > >
> > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > does not try to update UDP/IP header of the segment list but copy
> > > only the MAC header.
> > >
> > > Update dport, daddr and checksums of each skb of the segment list
> > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > >
> > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > > ---
> > > v1:
> > > Steffen Klassert said, there could be 2 options.
> > > https://lore.kernel.org/patchwork/patch/1362257/
> > > I was trying to write a quick fix, but it was not easy to forward
> > > segmented list. Currently, assuming DNAT only.
> > >
> > > v2:
> > > Per Steffen Klassert request, move the procedure from
> > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > >
> > > To Alexander Lobakin, I've checked your email late. Just use this
> > > patch as a reference. It support SNAT too, but does not support IPv6
> > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > to the file is in IPv4 directory.
> > >
> > >  include/net/udp.h      |  2 +-
> > >  net/ipv4/udp_offload.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
> > >  net/ipv6/udp_offload.c |  2 +-
> > >  3 files changed, 59 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > index 877832b..01351ba 100644
> > > --- a/include/net/udp.h
> > > +++ b/include/net/udp.h
> > > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> > >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> > >
> > >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > > -				  netdev_features_t features);
> > > +				  netdev_features_t features, bool is_ipv6);
> > >
> > >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> > >  {
> > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > > index ff39e94..c532d3b 100644
> > > --- a/net/ipv4/udp_offload.c
> > > +++ b/net/ipv4/udp_offload.c
> > > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> > >  }
> > >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> > >
> > > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > > +				     __be32 *oldip, __be32 *newip,
> > > +				     __be16 *oldport, __be16 *newport)
> > > +{
> > > +	struct udphdr *uh = udp_hdr(seg);
> > > +	struct iphdr *iph = ip_hdr(seg);
> > > +
> > > +	if (uh->check) {
> > > +		inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > > +					 true);
> > > +		inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > > +					 false);
> > > +		if (!uh->check)
> > > +			uh->check = CSUM_MANGLED_0;
> > > +	}
> > > +	uh->dest = *newport;
> > > +
> > > +	csum_replace4(&iph->check, *oldip, *newip);
> > > +	iph->daddr = *newip;
> > > +}
> >
> > Can't we just do the checksum recalculation for this case as it is done
> > with standard GRO?
> 
> If I understand standard GRO correctly, it calculates full checksum.
> Should we adopt the same method to UDP GRO fraglist? I did not find
> a simple method for the incremental checksum update.
> 
> >
> > > +
> > > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > > +{
> > > +	struct sk_buff *seg;
> > > +	struct udphdr *uh, *uh2;
> > > +	struct iphdr *iph, *iph2;
> > > +
> > > +	seg = segs;
> > > +	uh = udp_hdr(seg);
> > > +	iph = ip_hdr(seg);
> > > +
> > > +	while ((seg = seg->next)) {
> > > +		uh2 = udp_hdr(seg);
> > > +		iph2 = ip_hdr(seg);
> > > +
> > > +		if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> > > +			__udpv4_gso_segment_csum(seg,
> > > +						 &iph2->saddr, &iph->saddr,
> > > +						 &uh2->source, &uh->source);
> > > +
> > > +		if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> > > +			__udpv4_gso_segment_csum(seg,
> > > +						 &iph2->daddr, &iph->daddr,
> > > +						 &uh2->dest, &uh->dest);
> > > +	}
> >
> > You don't need to check the addresses and ports of all packets in the
> > segment list. If the addresses and ports are equal for the first and
> > second packet in the list, then this also holds for the rest of the
> > packets.
> 
> I think we can try this with an additional flag (seg_csum).
> 
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 36b7e30..3f892df 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -213,25 +213,36 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
>         struct sk_buff *seg;
>         struct udphdr *uh, *uh2;
>         struct iphdr *iph, *iph2;
> +       bool seg_csum = false;
> 
>         seg = segs;
>         uh = udp_hdr(seg);
>         iph = ip_hdr(seg);
> 
> -       while ((seg = seg->next)) {
> +       seg = seg->next;
> +       do {
> +               if (!seg)
> +                       break;
> +
>                 uh2 = udp_hdr(seg);
>                 iph2 = ip_hdr(seg);
> 
> -               if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> +               if (uh->source != uh2->source || iph->saddr != iph2->saddr) {
>                         __udpv4_gso_segment_csum(seg,
>                                                  &iph2->saddr, &iph->saddr,
>                                                  &uh2->source, &uh->source);
> +                       seg_csum = true;
> +               }
> 
> -               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> +               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr) {
>                         __udpv4_gso_segment_csum(seg,
>                                                  &iph2->daddr, &iph->daddr,
>                                                  &uh2->dest, &uh->dest);
> -       }
> +                       seg_csum = true;
> +               }
> +
> +               seg = seg->next;
> +       } while (seg_csum);
> 
>         return segs;
>  }

Hi, Steffen
Do you have any further comments for this? I think the bug fix has
higher priority than optimization. Can we defer the optimization
patch?
Steffen Klassert Jan. 21, 2021, 12:28 p.m. UTC | #10
On Wed, Jan 20, 2021 at 03:55:42PM +0900, Dongseok Yi wrote:
> On 2021-01-18 22:27, Steffen Klassert wrote:
> > On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > skb_gso_segment is updated but following frag_skbs are not updated.
> > >
> > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > does not try to update UDP/IP header of the segment list but copy
> > > only the MAC header.
> > >
> > > Update dport, daddr and checksums of each skb of the segment list
> > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > >
> > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > > ---
> > > v1:
> > > Steffen Klassert said, there could be 2 options.
> > > https://lore.kernel.org/patchwork/patch/1362257/
> > > I was trying to write a quick fix, but it was not easy to forward
> > > segmented list. Currently, assuming DNAT only.
> > >
> > > v2:
> > > Per Steffen Klassert request, move the procedure from
> > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > >
> > > To Alexander Lobakin, I've checked your email late. Just use this
> > > patch as a reference. It support SNAT too, but does not support IPv6
> > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > to the file is in IPv4 directory.
> > >
> > >  include/net/udp.h      |  2 +-
> > >  net/ipv4/udp_offload.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
> > >  net/ipv6/udp_offload.c |  2 +-
> > >  3 files changed, 59 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > index 877832b..01351ba 100644
> > > --- a/include/net/udp.h
> > > +++ b/include/net/udp.h
> > > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> > >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> > >
> > >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > > -				  netdev_features_t features);
> > > +				  netdev_features_t features, bool is_ipv6);
> > >
> > >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> > >  {
> > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > > index ff39e94..c532d3b 100644
> > > --- a/net/ipv4/udp_offload.c
> > > +++ b/net/ipv4/udp_offload.c
> > > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> > >  }
> > >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> > >
> > > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > > +				     __be32 *oldip, __be32 *newip,
> > > +				     __be16 *oldport, __be16 *newport)
> > > +{
> > > +	struct udphdr *uh = udp_hdr(seg);
> > > +	struct iphdr *iph = ip_hdr(seg);
> > > +
> > > +	if (uh->check) {
> > > +		inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > > +					 true);
> > > +		inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > > +					 false);
> > > +		if (!uh->check)
> > > +			uh->check = CSUM_MANGLED_0;
> > > +	}
> > > +	uh->dest = *newport;
> > > +
> > > +	csum_replace4(&iph->check, *oldip, *newip);
> > > +	iph->daddr = *newip;
> > > +}
> > 
> > Can't we just do the checksum recalculation for this case as it is done
> > with standard GRO?
> 
> If I understand standard GRO correctly, it calculates full checksum.
> Should we adopt the same method to UDP GRO fraglist? I did not find
> a simple method for the incremental checksum update.
> 
> > 
> > > +
> > > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > > +{
> > > +	struct sk_buff *seg;
> > > +	struct udphdr *uh, *uh2;
> > > +	struct iphdr *iph, *iph2;
> > > +
> > > +	seg = segs;
> > > +	uh = udp_hdr(seg);
> > > +	iph = ip_hdr(seg);
> > > +
> > > +	while ((seg = seg->next)) {
> > > +		uh2 = udp_hdr(seg);
> > > +		iph2 = ip_hdr(seg);
> > > +
> > > +		if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> > > +			__udpv4_gso_segment_csum(seg,
> > > +						 &iph2->saddr, &iph->saddr,
> > > +						 &uh2->source, &uh->source);
> > > +
> > > +		if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> > > +			__udpv4_gso_segment_csum(seg,
> > > +						 &iph2->daddr, &iph->daddr,
> > > +						 &uh2->dest, &uh->dest);
> > > +	}


> > 
> > You don't need to check the addresses and ports of all packets in the
> > segment list. If the addresses and ports are equal for the first and
> > second packet in the list, then this also holds for the rest of the
> > packets.
> 
> I think we can try this with an additional flag (seg_csum).
> 
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 36b7e30..3f892df 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -213,25 +213,36 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
>         struct sk_buff *seg;
>         struct udphdr *uh, *uh2;
>         struct iphdr *iph, *iph2;
> +       bool seg_csum = false;
> 
>         seg = segs;
>         uh = udp_hdr(seg);
>         iph = ip_hdr(seg);

Why not

       if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
           (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
           (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
           (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr))
	   	return segs;

Then you don't need to test inside the loop. Just update all
packets if there is a header mismatch.

> 
> -       while ((seg = seg->next)) {
> +       seg = seg->next;
> +       do {
> +               if (!seg)
> +                       break;
> +
>                 uh2 = udp_hdr(seg);
>                 iph2 = ip_hdr(seg);
> 
> -               if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> +               if (uh->source != uh2->source || iph->saddr != iph2->saddr) {
>                         __udpv4_gso_segment_csum(seg,
>                                                  &iph2->saddr, &iph->saddr,
>                                                  &uh2->source, &uh->source);
> +                       seg_csum = true;
> +               }
> 
> -               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> +               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr) {
>                         __udpv4_gso_segment_csum(seg,
>                                                  &iph2->daddr, &iph->daddr,
>                                                  &uh2->dest, &uh->dest);
> -       }
> +                       seg_csum = true;
> +               }
> +
> +               seg = seg->next;
> +       } while (seg_csum);
> 
>         return segs;
>  }
Dongseok Yi Jan. 21, 2021, 12:47 p.m. UTC | #11
On 2021-01-21 21:28, Steffen Klassert wrote:
> On Wed, Jan 20, 2021 at 03:55:42PM +0900, Dongseok Yi wrote:
> > On 2021-01-18 22:27, Steffen Klassert wrote:
> > > On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > > skb_gso_segment is updated but following frag_skbs are not updated.
> > > >
> > > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > > does not try to update UDP/IP header of the segment list but copy
> > > > only the MAC header.
> > > >
> > > > Update dport, daddr and checksums of each skb of the segment list
> > > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > > >
> > > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > > Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
> > > > ---
> > > > v1:
> > > > Steffen Klassert said, there could be 2 options.
> > > > https://lore.kernel.org/patchwork/patch/1362257/
> > > > I was trying to write a quick fix, but it was not easy to forward
> > > > segmented list. Currently, assuming DNAT only.
> > > >
> > > > v2:
> > > > Per Steffen Klassert request, move the procedure from
> > > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > > >
> > > > To Alexander Lobakin, I've checked your email late. Just use this
> > > > patch as a reference. It support SNAT too, but does not support IPv6
> > > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > > to the file is in IPv4 directory.
> > > >
> > > >  include/net/udp.h      |  2 +-
> > > >  net/ipv4/udp_offload.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
> > > >  net/ipv6/udp_offload.c |  2 +-
> > > >  3 files changed, 59 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > > index 877832b..01351ba 100644
> > > > --- a/include/net/udp.h
> > > > +++ b/include/net/udp.h
> > > > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> > > >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> > > >
> > > >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > > > -				  netdev_features_t features);
> > > > +				  netdev_features_t features, bool is_ipv6);
> > > >
> > > >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> > > >  {
> > > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > > > index ff39e94..c532d3b 100644
> > > > --- a/net/ipv4/udp_offload.c
> > > > +++ b/net/ipv4/udp_offload.c
> > > > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> > > >  }
> > > >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> > > >
> > > > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > > > +				     __be32 *oldip, __be32 *newip,
> > > > +				     __be16 *oldport, __be16 *newport)
> > > > +{
> > > > +	struct udphdr *uh = udp_hdr(seg);
> > > > +	struct iphdr *iph = ip_hdr(seg);
> > > > +
> > > > +	if (uh->check) {
> > > > +		inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > > > +					 true);
> > > > +		inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > > > +					 false);
> > > > +		if (!uh->check)
> > > > +			uh->check = CSUM_MANGLED_0;
> > > > +	}
> > > > +	uh->dest = *newport;
> > > > +
> > > > +	csum_replace4(&iph->check, *oldip, *newip);
> > > > +	iph->daddr = *newip;
> > > > +}
> > >
> > > Can't we just do the checksum recalculation for this case as it is done
> > > with standard GRO?
> >
> > If I understand standard GRO correctly, it calculates full checksum.
> > Should we adopt the same method to UDP GRO fraglist? I did not find
> > a simple method for the incremental checksum update.
> >
> > >
> > > > +
> > > > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > > > +{
> > > > +	struct sk_buff *seg;
> > > > +	struct udphdr *uh, *uh2;
> > > > +	struct iphdr *iph, *iph2;
> > > > +
> > > > +	seg = segs;
> > > > +	uh = udp_hdr(seg);
> > > > +	iph = ip_hdr(seg);
> > > > +
> > > > +	while ((seg = seg->next)) {
> > > > +		uh2 = udp_hdr(seg);
> > > > +		iph2 = ip_hdr(seg);
> > > > +
> > > > +		if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> > > > +			__udpv4_gso_segment_csum(seg,
> > > > +						 &iph2->saddr, &iph->saddr,
> > > > +						 &uh2->source, &uh->source);
> > > > +
> > > > +		if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> > > > +			__udpv4_gso_segment_csum(seg,
> > > > +						 &iph2->daddr, &iph->daddr,
> > > > +						 &uh2->dest, &uh->dest);
> > > > +	}
> 
> 
> > >
> > > You don't need to check the addresses and ports of all packets in the
> > > segment list. If the addresses and ports are equal for the first and
> > > second packet in the list, then this also holds for the rest of the
> > > packets.
> >
> > I think we can try this with an additional flag (seg_csum).
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 36b7e30..3f892df 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -213,25 +213,36 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> >         struct sk_buff *seg;
> >         struct udphdr *uh, *uh2;
> >         struct iphdr *iph, *iph2;
> > +       bool seg_csum = false;
> >
> >         seg = segs;
> >         uh = udp_hdr(seg);
> >         iph = ip_hdr(seg);
> 
> Why not
> 
>        if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
>            (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
>            (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
>            (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr))
> 	   	return segs;
> 
> Then you don't need to test inside the loop. Just update all
> packets if there is a header mismatch.

The test inside the loop would be needed to distinguish SNAT
and DNAT. I will try your approach on v3. Thanks.

> 
> >
> > -       while ((seg = seg->next)) {
> > +       seg = seg->next;
> > +       do {
> > +               if (!seg)
> > +                       break;
> > +
> >                 uh2 = udp_hdr(seg);
> >                 iph2 = ip_hdr(seg);
> >
> > -               if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> > +               if (uh->source != uh2->source || iph->saddr != iph2->saddr) {
> >                         __udpv4_gso_segment_csum(seg,
> >                                                  &iph2->saddr, &iph->saddr,
> >                                                  &uh2->source, &uh->source);
> > +                       seg_csum = true;
> > +               }
> >
> > -               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
> > +               if (uh->dest != uh2->dest || iph->daddr != iph2->daddr) {
> >                         __udpv4_gso_segment_csum(seg,
> >                                                  &iph2->daddr, &iph->daddr,
> >                                                  &uh2->dest, &uh->dest);
> > -       }
> > +                       seg_csum = true;
> > +               }
> > +
> > +               seg = seg->next;
> > +       } while (seg_csum);
> >
> >         return segs;
> >  }
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index 877832b..01351ba 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -178,7 +178,7 @@  struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
-				  netdev_features_t features);
+				  netdev_features_t features, bool is_ipv6);
 
 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..c532d3b 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,8 +187,57 @@  struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static void __udpv4_gso_segment_csum(struct sk_buff *seg,
+				     __be32 *oldip, __be32 *newip,
+				     __be16 *oldport, __be16 *newport)
+{
+	struct udphdr *uh = udp_hdr(seg);
+	struct iphdr *iph = ip_hdr(seg);
+
+	if (uh->check) {
+		inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
+					 true);
+		inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
+					 false);
+		if (!uh->check)
+			uh->check = CSUM_MANGLED_0;
+	}
+	uh->dest = *newport;
+
+	csum_replace4(&iph->check, *oldip, *newip);
+	iph->daddr = *newip;
+}
+
+static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+	struct sk_buff *seg;
+	struct udphdr *uh, *uh2;
+	struct iphdr *iph, *iph2;
+
+	seg = segs;
+	uh = udp_hdr(seg);
+	iph = ip_hdr(seg);
+
+	while ((seg = seg->next)) {
+		uh2 = udp_hdr(seg);
+		iph2 = ip_hdr(seg);
+
+		if (uh->source != uh2->source || iph->saddr != iph2->saddr)
+			__udpv4_gso_segment_csum(seg,
+						 &iph2->saddr, &iph->saddr,
+						 &uh2->source, &uh->source);
+
+		if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
+			__udpv4_gso_segment_csum(seg,
+						 &iph2->daddr, &iph->daddr,
+						 &uh2->dest, &uh->dest);
+	}
+
+	return segs;
+}
+
 static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
-					      netdev_features_t features)
+					      netdev_features_t features, bool is_ipv6)
 {
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 
@@ -198,11 +247,14 @@  static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
 
 	udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
 
-	return skb;
+	if (is_ipv6)
+		return skb;
+	else
+		return __udpv4_gso_segment_list_csum(skb);
 }
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
-				  netdev_features_t features)
+				  netdev_features_t features, bool is_ipv6)
 {
 	struct sock *sk = gso_skb->sk;
 	unsigned int sum_truesize = 0;
@@ -214,7 +266,7 @@  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 	__be16 newlen;
 
 	if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
-		return __udp_gso_segment_list(gso_skb, features);
+		return __udp_gso_segment_list(gso_skb, features, is_ipv6);
 
 	mss = skb_shinfo(gso_skb)->gso_size;
 	if (gso_skb->len <= sizeof(*uh) + mss)
@@ -328,7 +380,7 @@  static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		goto out;
 
 	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
-		return __udp_gso_segment(skb, features);
+		return __udp_gso_segment(skb, features, false);
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index c7bd7b1..faa823c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,7 +42,7 @@  static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 			goto out;
 
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
-			return __udp_gso_segment(skb, features);
+			return __udp_gso_segment(skb, features, true);
 
 		mss = skb_shinfo(skb)->gso_size;
 		if (unlikely(skb->len <= mss))