diff mbox series

[net-next,2/3] net: gro: parse ipv6 ext headers without frag0

Message ID 32febbc9-e603-4400-addd-bdb97ce56c1d@gmail.com (mailing list archive)
State New
Headers show
Series net: gro: reduce extension header parsing overhead | expand

Commit Message

Richard Gobert Dec. 21, 2023, 6:57 p.m. UTC
This commit utilizes a new helper function, ipv6_gro_pull_exthdrs, which
is used in ipv6_gro_receive to pull ipv6 ext headers instead of
ipv6_gso_pull_exthdrs. To use ipv6_gso_pull_exthdr, pskb_pull and
__skb_push must be used, and frag0 must be invalidated. This commit
removes unnecessary code around the call to ipv6_gso_pull_exthdrs and
enables the frag0 fast path in IPv6 packets with ext headers.

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
 net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 10 deletions(-)

Comments

Willem de Bruijn Dec. 26, 2023, 9:59 p.m. UTC | #1
Richard Gobert wrote:
> This commit utilizes a new helper function, ipv6_gro_pull_exthdrs, which
> is used in ipv6_gro_receive to pull ipv6 ext headers instead of
> ipv6_gso_pull_exthdrs. To use ipv6_gso_pull_exthdr, pskb_pull and
> __skb_push must be used, and frag0 must be invalidated. This commit
> removes unnecessary code around the call to ipv6_gso_pull_exthdrs and
> enables the frag0 fast path in IPv6 packets with ext headers.
> 
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> ---
>  net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
> index 0e0b5fed0995..a3b8d9127dbb 100644
> --- a/net/ipv6/ip6_offload.c
> +++ b/net/ipv6/ip6_offload.c
> @@ -37,6 +37,40 @@
>  		INDIRECT_CALL_L4(cb, f2, f1, head, skb);	\
>  })
>  
> +static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
> +{
> +	const struct net_offload *ops = NULL;
> +	struct ipv6_opt_hdr *opth;
> +
> +	for (;;) {
> +		int len;
> +
> +		ops = rcu_dereference(inet6_offloads[proto]);
> +
> +		if (unlikely(!ops))
> +			break;
> +
> +		if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
> +			break;
> +
> +		opth = skb_gro_header(skb, off + 8, off);

When changing this code, it would be great to make it more self
documenting. It's not entirely clear what that 8 is based on.
sizeof(*opth) is only 2. Probably an optimization to handle the most
common extension headers in a single pskb_may_pull? If so, this new
code does not have that concern, so can just use sizeof(*opth). Or
else add a const int likely_max_opt_hdr_len = 8 or so.


> +		if (unlikely(!opth))
> +			break;
> +
> +		len = ipv6_optlen(opth);
> +
> +		opth = skb_gro_header(skb, off + len, off);
> +		if (unlikely(!opth))
> +			break;
> +		proto = opth->nexthdr;
> +
> +		off += len;
> +	}
> +
> +	skb_gro_pull(skb, off - skb_network_offset(skb));
> +	return proto;
> +}
> +
>  static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
>  {
>  	const struct net_offload *ops = NULL;
> @@ -203,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
>  		goto out;
>  
>  	skb_set_network_header(skb, off);
> -	skb_gro_pull(skb, sizeof(*iph));
> -	skb_set_transport_header(skb, skb_gro_offset(skb));
>  
> -	flush += ntohs(iph->payload_len) != skb_gro_len(skb);
> +	flush += ntohs(iph->payload_len) != skb->len - hlen;
>  
>  	proto = iph->nexthdr;
>  	ops = rcu_dereference(inet6_offloads[proto]);
>  	if (!ops || !ops->callbacks.gro_receive) {
> -		pskb_pull(skb, skb_gro_offset(skb));
> -		skb_gro_frag0_invalidate(skb);
> -		proto = ipv6_gso_pull_exthdrs(skb, proto);
> -		skb_gro_pull(skb, -skb_transport_offset(skb));
> -		skb_reset_transport_header(skb);
> -		__skb_push(skb, skb_gro_offset(skb));
> +		proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
>  
>  		ops = rcu_dereference(inet6_offloads[proto]);
>  		if (!ops || !ops->callbacks.gro_receive)
>  			goto out;
>  
> -		iph = ipv6_hdr(skb);
> +		iph = skb_gro_network_header(skb);
> +	} else {
> +		skb_gro_pull(skb, sizeof(*iph));
>  	}

This code is non-obvious and has proven fragile (57ea52a8651). Changes
are best as simple as they can be, with ample documentation. My
attempt, as arrived at during review:

The existing always pulls the IPv6 header and sets the transport
offset initially. Then optionally again pulls any extension headers
in ipv6_gso_pull_exthdrs and sets the transport offset again on
return from that call.

The new code adds a small optimization to only pull and set transport
offset once.

The existing code needs to set skb->data at the start of the first
extension header before calling ipv6_gso_pull_exthdrs, and must
disable the frag0 optimization because that function uses
pskb_may_pull/pskb_pull instead of skb_gro_ helpers. It sets the
GRO offset to the inner TCP header with skb_gro_pull and sets the
transport header. Then returns skb->data to its position before
this block.

The new code is much simpler: it does not have to modify skb->data,
as all operations are with skb_gro_ helpers.

Aside from the small comment above, and suggestion to include
something like this summary in the code and/or avoid the extra
optimization,

Reviewed-by: Willem de Bruijn <willemb@google.com>

>  
> +	skb_set_transport_header(skb, skb_gro_offset(skb));
> +
>  	NAPI_GRO_CB(skb)->proto = proto;
>  
>  	flush--;
> -- 
> 2.36.1
>
Richard Gobert Dec. 28, 2023, 4:46 p.m. UTC | #2
Willem de Bruijn wrote:
> Richard Gobert wrote:
>> This commit utilizes a new helper function, ipv6_gro_pull_exthdrs, which
>> is used in ipv6_gro_receive to pull ipv6 ext headers instead of
>> ipv6_gso_pull_exthdrs. To use ipv6_gso_pull_exthdr, pskb_pull and
>> __skb_push must be used, and frag0 must be invalidated. This commit
>> removes unnecessary code around the call to ipv6_gso_pull_exthdrs and
>> enables the frag0 fast path in IPv6 packets with ext headers.
>>
>> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
>> ---
>>  net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
>>  1 file changed, 41 insertions(+), 10 deletions(-)
>>
>> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
>> index 0e0b5fed0995..a3b8d9127dbb 100644
>> --- a/net/ipv6/ip6_offload.c
>> +++ b/net/ipv6/ip6_offload.c
>> @@ -37,6 +37,40 @@
>>  		INDIRECT_CALL_L4(cb, f2, f1, head, skb);	\
>>  })
>>  
>> +static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
>> +{
>> +	const struct net_offload *ops = NULL;
>> +	struct ipv6_opt_hdr *opth;
>> +
>> +	for (;;) {
>> +		int len;
>> +
>> +		ops = rcu_dereference(inet6_offloads[proto]);
>> +
>> +		if (unlikely(!ops))
>> +			break;
>> +
>> +		if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
>> +			break;
>> +
>> +		opth = skb_gro_header(skb, off + 8, off);
> 
> When changing this code, it would be great to make it more self
> documenting. It's not entirely clear what that 8 is based on.
> sizeof(*opth) is only 2. Probably an optimization to handle the most
> common extension headers in a single pskb_may_pull? If so, this new
> code does not have that concern, so can just use sizeof(*opth). Or
> else add a const int likely_max_opt_hdr_len = 8 or so.
> 
> 
>> +		if (unlikely(!opth))
>> +			break;
>> +
>> +		len = ipv6_optlen(opth);
>> +
>> +		opth = skb_gro_header(skb, off + len, off);
>> +		if (unlikely(!opth))
>> +			break;
>> +		proto = opth->nexthdr;
>> +
>> +		off += len;
>> +	}
>> +
>> +	skb_gro_pull(skb, off - skb_network_offset(skb));
>> +	return proto;
>> +}
>> +
>>  static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
>>  {
>>  	const struct net_offload *ops = NULL;
>> @@ -203,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
>>  		goto out;
>>  
>>  	skb_set_network_header(skb, off);
>> -	skb_gro_pull(skb, sizeof(*iph));
>> -	skb_set_transport_header(skb, skb_gro_offset(skb));
>>  
>> -	flush += ntohs(iph->payload_len) != skb_gro_len(skb);
>> +	flush += ntohs(iph->payload_len) != skb->len - hlen;
>>  
>>  	proto = iph->nexthdr;
>>  	ops = rcu_dereference(inet6_offloads[proto]);
>>  	if (!ops || !ops->callbacks.gro_receive) {
>> -		pskb_pull(skb, skb_gro_offset(skb));
>> -		skb_gro_frag0_invalidate(skb);
>> -		proto = ipv6_gso_pull_exthdrs(skb, proto);
>> -		skb_gro_pull(skb, -skb_transport_offset(skb));
>> -		skb_reset_transport_header(skb);
>> -		__skb_push(skb, skb_gro_offset(skb));
>> +		proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
>>  
>>  		ops = rcu_dereference(inet6_offloads[proto]);
>>  		if (!ops || !ops->callbacks.gro_receive)
>>  			goto out;
>>  
>> -		iph = ipv6_hdr(skb);
>> +		iph = skb_gro_network_header(skb);
>> +	} else {
>> +		skb_gro_pull(skb, sizeof(*iph));
>>  	}
> 
> This code is non-obvious and has proven fragile (57ea52a8651). Changes
> are best as simple as they can be, with ample documentation. My
> attempt, as arrived at during review:
> 
> The existing always pulls the IPv6 header and sets the transport
> offset initially. Then optionally again pulls any extension headers
> in ipv6_gso_pull_exthdrs and sets the transport offset again on
> return from that call.
> 
> The new code adds a small optimization to only pull and set transport
> offset once.
> 
> The existing code needs to set skb->data at the start of the first
> extension header before calling ipv6_gso_pull_exthdrs, and must
> disable the frag0 optimization because that function uses
> pskb_may_pull/pskb_pull instead of skb_gro_ helpers. It sets the
> GRO offset to the inner TCP header with skb_gro_pull and sets the
> transport header. Then returns skb->data to its position before
> this block.
> 
> The new code is much simpler: it does not have to modify skb->data,
> as all operations are with skb_gro_ helpers.
> 
> Aside from the small comment above, and suggestion to include
> something like this summary in the code and/or avoid the extra
> optimization,
> 
> Reviewed-by: Willem de Bruijn <willemb@google.com>
> 

Thanks for the review, I'll submit v2 with an extended commit message
summary as suggested.

>>  
>> +	skb_set_transport_header(skb, skb_gro_offset(skb));
>> +
>>  	NAPI_GRO_CB(skb)->proto = proto;
>>  
>>  	flush--;
>> -- 
>> 2.36.1
>>
> 
>
diff mbox series

Patch

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 0e0b5fed0995..a3b8d9127dbb 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -37,6 +37,40 @@ 
 		INDIRECT_CALL_L4(cb, f2, f1, head, skb);	\
 })
 
+static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
+{
+	const struct net_offload *ops = NULL;
+	struct ipv6_opt_hdr *opth;
+
+	for (;;) {
+		int len;
+
+		ops = rcu_dereference(inet6_offloads[proto]);
+
+		if (unlikely(!ops))
+			break;
+
+		if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+			break;
+
+		opth = skb_gro_header(skb, off + 8, off);
+		if (unlikely(!opth))
+			break;
+
+		len = ipv6_optlen(opth);
+
+		opth = skb_gro_header(skb, off + len, off);
+		if (unlikely(!opth))
+			break;
+		proto = opth->nexthdr;
+
+		off += len;
+	}
+
+	skb_gro_pull(skb, off - skb_network_offset(skb));
+	return proto;
+}
+
 static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 {
 	const struct net_offload *ops = NULL;
@@ -203,28 +237,25 @@  INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 		goto out;
 
 	skb_set_network_header(skb, off);
-	skb_gro_pull(skb, sizeof(*iph));
-	skb_set_transport_header(skb, skb_gro_offset(skb));
 
-	flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+	flush += ntohs(iph->payload_len) != skb->len - hlen;
 
 	proto = iph->nexthdr;
 	ops = rcu_dereference(inet6_offloads[proto]);
 	if (!ops || !ops->callbacks.gro_receive) {
-		pskb_pull(skb, skb_gro_offset(skb));
-		skb_gro_frag0_invalidate(skb);
-		proto = ipv6_gso_pull_exthdrs(skb, proto);
-		skb_gro_pull(skb, -skb_transport_offset(skb));
-		skb_reset_transport_header(skb);
-		__skb_push(skb, skb_gro_offset(skb));
+		proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
 
 		ops = rcu_dereference(inet6_offloads[proto]);
 		if (!ops || !ops->callbacks.gro_receive)
 			goto out;
 
-		iph = ipv6_hdr(skb);
+		iph = skb_gro_network_header(skb);
+	} else {
+		skb_gro_pull(skb, sizeof(*iph));
 	}
 
+	skb_set_transport_header(skb, skb_gro_offset(skb));
+
 	NAPI_GRO_CB(skb)->proto = proto;
 
 	flush--;