diff mbox series

[net,2/2] net: ioam6: mitigate the two reallocations problem

Message ID 20240702174451.22735-3-justin.iurman@uliege.be (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: ioam6: fix bugs in ioam6_iptunnel | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 856 this patch: 856
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 860 this patch: 860
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 860 this patch: 860
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 102 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-07-03--12-00 (tests: 666)

Commit Message

Justin Iurman July 2, 2024, 5:44 p.m. UTC
Get the cache _before_ adding bytes. This way, we provide the dst entry
to skb_cow_head(), so that we call LL_RESERVED_SPACE() on it and avoid
two reallocations in some specific cases. We cannot do much when the dst
entry is empty (cache is empty, this is the first time): in that case,
we use skb->mac_len by default and two reallocations will happen in
those specific cases. However, it will only happen once, not every
single time.

Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
---
 net/ipv6/ioam6_iptunnel.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

Comments

Justin Iurman July 2, 2024, 6:30 p.m. UTC | #1
On 7/2/24 19:44, Justin Iurman wrote:
> Get the cache _before_ adding bytes. This way, we provide the dst entry
> to skb_cow_head(), so that we call LL_RESERVED_SPACE() on it and avoid
> two reallocations in some specific cases. We cannot do much when the dst
> entry is empty (cache is empty, this is the first time): in that case,
> we use skb->mac_len by default and two reallocations will happen in
> those specific cases. However, it will only happen once, not every
> single time.

This fix could also be applied to seg6 and rpl. Not sure if the problem 
would show up though (I did some quick computations, seems unlikely), 
but still... would probably be interesting to have it there too, just in 
case. Any opinion?

> Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
> Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
> ---
>   net/ipv6/ioam6_iptunnel.c | 36 ++++++++++++++++++++----------------
>   1 file changed, 20 insertions(+), 16 deletions(-)
> 
> diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
> index b08c13550144..e5a7e7472b71 100644
> --- a/net/ipv6/ioam6_iptunnel.c
> +++ b/net/ipv6/ioam6_iptunnel.c
> @@ -220,14 +220,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
>   }
>   
>   static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
> -			   struct ioam6_lwt_encap *tuninfo)
> +			   struct ioam6_lwt_encap *tuninfo,
> +			   struct dst_entry *dst)
>   {
>   	struct ipv6hdr *oldhdr, *hdr;
>   	int hdrlen, err;
>   
>   	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>   
> -	err = skb_cow_head(skb, hdrlen + skb->mac_len);
> +	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
> +					       : LL_RESERVED_SPACE(dst->dev)));
>   	if (unlikely(err))
>   		return err;
>   
> @@ -256,16 +258,17 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
>   
>   static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
>   			  struct ioam6_lwt_encap *tuninfo,
> -			  struct in6_addr *tundst)
> +			  struct in6_addr *tundst,
> +			  struct dst_entry *dst)
>   {
> -	struct dst_entry *dst = skb_dst(skb);
>   	struct ipv6hdr *hdr, *inner_hdr;
>   	int hdrlen, len, err;
>   
>   	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
>   	len = sizeof(*hdr) + hdrlen;
>   
> -	err = skb_cow_head(skb, len + skb->mac_len);
> +	err = skb_cow_head(skb, len + (!dst ? skb->mac_len
> +					    : LL_RESERVED_SPACE(dst->dev)));
>   	if (unlikely(err))
>   		return err;
>   
> @@ -285,7 +288,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
>   	hdr->nexthdr = NEXTHDR_HOP;
>   	hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
>   	hdr->daddr = *tundst;
> -	ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
> +	ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
>   			   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
>   
>   	skb_postpush_rcsum(skb, hdr, len);
> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   
>   	orig_daddr = ipv6_hdr(skb)->daddr;
>   
> +	local_bh_disable();
> +	dst = dst_cache_get(&ilwt->cache);
> +	local_bh_enable();
> +
>   	switch (ilwt->mode) {
>   	case IOAM6_IPTUNNEL_MODE_INLINE:
>   do_inline:
> @@ -320,7 +327,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
>   			goto out;
>   
> -		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
> +		err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
>   		if (unlikely(err))
>   			goto drop;
>   
> @@ -328,7 +335,8 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   	case IOAM6_IPTUNNEL_MODE_ENCAP:
>   do_encap:
>   		/* Encapsulation (ip6ip6) */
> -		err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
> +		err = ioam6_do_encap(net, skb,
> +				     &ilwt->tuninfo, &ilwt->tundst, dst);
>   		if (unlikely(err))
>   			goto drop;
>   
> @@ -346,10 +354,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   		goto drop;
>   	}
>   
> -	local_bh_disable();
> -	dst = dst_cache_get(&ilwt->cache);
> -	local_bh_enable();
> -
>   	if (unlikely(!dst)) {
>   		struct ipv6hdr *hdr = ipv6_hdr(skb);
>   		struct flowi6 fl6;
> @@ -371,15 +375,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>   		local_bh_disable();
>   		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
>   		local_bh_enable();
> +
> +		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> +		if (unlikely(err))
> +			goto drop;
>   	}
>   
>   	skb_dst_drop(skb);
>   	skb_dst_set(skb, dst);
>   
> -	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> -	if (unlikely(err))
> -		goto drop;
> -
>   	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
>   		return dst_output(net, sk, skb);
>   out:
Paolo Abeni July 4, 2024, 9:23 a.m. UTC | #2
On Tue, 2024-07-02 at 19:44 +0200, Justin Iurman wrote:
> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>  
>  	orig_daddr = ipv6_hdr(skb)->daddr;
>  
> +	local_bh_disable();
> +	dst = dst_cache_get(&ilwt->cache);
> +	local_bh_enable();
> +
>  	switch (ilwt->mode) {
>  	case IOAM6_IPTUNNEL_MODE_INLINE:

I now see that the way you coded patch 1/2 makes this one easier.

Still I think it's quite doubtful to make the dst cache access
unconditional.

Given the above I suggest to replace the 2 patches with a single one
moving the whole dst_cache logic before the switch statement.

Also this does not address a functional issue, IMHO it's more a
performance improvement, could as well target net-next with no fixes
tag.

WRT seg6 and rpl tunnels, before any patch, I think we first need
confirmation the problem is present there, too.

Thanks,

Paolo
Justin Iurman July 4, 2024, 11:58 a.m. UTC | #3
On 7/4/24 11:23, Paolo Abeni wrote:
> On Tue, 2024-07-02 at 19:44 +0200, Justin Iurman wrote:
>> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>>   
>>   	orig_daddr = ipv6_hdr(skb)->daddr;
>>   
>> +	local_bh_disable();
>> +	dst = dst_cache_get(&ilwt->cache);
>> +	local_bh_enable();
>> +
>>   	switch (ilwt->mode) {
>>   	case IOAM6_IPTUNNEL_MODE_INLINE:
> 
> I now see that the way you coded patch 1/2 makes this one easier.

Hi Paolo,

Indeed. I originally had it as a single two-in-one patch, then I thought 
it would be clearer to split it up (looks like I was wrong, sorry).

> Still I think it's quite doubtful to make the dst cache access
> unconditional.

By unconditional, you mean to get the cache _before_ the switch, right? 
If so, that's indeed the only solution to provide it to the encap/inline 
function for the mitigation. However, I don't see it as a problem. 
Instead of having (a) call encap/fill function, then (b) get cache; 
you'd have (a) get cache, then (b) call encap/fill function. IMHO, it's 
the same. I'll re-run our measurements and compare them to our previous 
results in order to confirm getting the cache early does not impact 
performance. The only exception would be when skb_cow_head returns an 
error in encap/fill functions: in that case, getting the cache early 
would be a waste of time, but this situation suggests there is a problem 
already so it's probably fine.

> Given the above I suggest to replace the 2 patches with a single one
> moving the whole dst_cache logic before the switch statement.

Will do!

> Also this does not address a functional issue, IMHO it's more a
> performance improvement, could as well target net-next with no fixes
> tag.

Hmmm, it's indeed OK to target net-next for patch #2 since it could be 
considered as an improvement (not really a functional issue per se). 
However, I'm not sure for patch #1. Wouldn't the kernel crash if not 
enough headroom was allocated (assuming no check is done before writing 
in the driver)?

> WRT seg6 and rpl tunnels, before any patch, I think we first need
> confirmation the problem is present there, too.

Ack. I'll try to run some tests to check that.

Thanks,
Justin

> Thanks,
> 
> Paolo
>
diff mbox series

Patch

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index b08c13550144..e5a7e7472b71 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -220,14 +220,16 @@  static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
 }
 
 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
-			   struct ioam6_lwt_encap *tuninfo)
+			   struct ioam6_lwt_encap *tuninfo,
+			   struct dst_entry *dst)
 {
 	struct ipv6hdr *oldhdr, *hdr;
 	int hdrlen, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 
-	err = skb_cow_head(skb, hdrlen + skb->mac_len);
+	err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len
+					       : LL_RESERVED_SPACE(dst->dev)));
 	if (unlikely(err))
 		return err;
 
@@ -256,16 +258,17 @@  static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
 
 static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 			  struct ioam6_lwt_encap *tuninfo,
-			  struct in6_addr *tundst)
+			  struct in6_addr *tundst,
+			  struct dst_entry *dst)
 {
-	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr, *inner_hdr;
 	int hdrlen, len, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 	len = sizeof(*hdr) + hdrlen;
 
-	err = skb_cow_head(skb, len + skb->mac_len);
+	err = skb_cow_head(skb, len + (!dst ? skb->mac_len
+					    : LL_RESERVED_SPACE(dst->dev)));
 	if (unlikely(err))
 		return err;
 
@@ -285,7 +288,7 @@  static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 	hdr->nexthdr = NEXTHDR_HOP;
 	hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
 	hdr->daddr = *tundst;
-	ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+	ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
 			   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
 
 	skb_postpush_rcsum(skb, hdr, len);
@@ -313,6 +316,10 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	orig_daddr = ipv6_hdr(skb)->daddr;
 
+	local_bh_disable();
+	dst = dst_cache_get(&ilwt->cache);
+	local_bh_enable();
+
 	switch (ilwt->mode) {
 	case IOAM6_IPTUNNEL_MODE_INLINE:
 do_inline:
@@ -320,7 +327,7 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
 			goto out;
 
-		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+		err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -328,7 +335,8 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	case IOAM6_IPTUNNEL_MODE_ENCAP:
 do_encap:
 		/* Encapsulation (ip6ip6) */
-		err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+		err = ioam6_do_encap(net, skb,
+				     &ilwt->tuninfo, &ilwt->tundst, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -346,10 +354,6 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	local_bh_disable();
-	dst = dst_cache_get(&ilwt->cache);
-	local_bh_enable();
-
 	if (unlikely(!dst)) {
 		struct ipv6hdr *hdr = ipv6_hdr(skb);
 		struct flowi6 fl6;
@@ -371,15 +375,15 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		local_bh_disable();
 		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
 		local_bh_enable();
+
+		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+		if (unlikely(err))
+			goto drop;
 	}
 
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
 
-	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
-	if (unlikely(err))
-		goto drop;
-
 	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
 		return dst_output(net, sk, skb);
 out: