Message ID | 20240702174451.22735-3-justin.iurman@uliege.be (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: ioam6: fix bugs in ioam6_iptunnel | expand |
On 7/2/24 19:44, Justin Iurman wrote: > Get the cache _before_ adding bytes. This way, we provide the dst entry > to skb_cow_head(), so that we call LL_RESERVED_SPACE() on it and avoid > two reallocations in some specific cases. We cannot do much when the dst > entry is empty (cache is empty, this is the first time): in that case, > we use skb->mac_len by default and two reallocations will happen in > those specific cases. However, it will only happen once, not every > single time. This fix could also be applied to seg6 and rpl. Not sure if the problem would show up though (I did some quick computations, seems unlikely), but still... would probably be interesting to have it there too, just in case. Any opinion? > Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation") > Signed-off-by: Justin Iurman <justin.iurman@uliege.be> > --- > net/ipv6/ioam6_iptunnel.c | 36 ++++++++++++++++++++---------------- > 1 file changed, 20 insertions(+), 16 deletions(-) > > diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c > index b08c13550144..e5a7e7472b71 100644 > --- a/net/ipv6/ioam6_iptunnel.c > +++ b/net/ipv6/ioam6_iptunnel.c > @@ -220,14 +220,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb) > } > > static int ioam6_do_inline(struct net *net, struct sk_buff *skb, > - struct ioam6_lwt_encap *tuninfo) > + struct ioam6_lwt_encap *tuninfo, > + struct dst_entry *dst) > { > struct ipv6hdr *oldhdr, *hdr; > int hdrlen, err; > > hdrlen = (tuninfo->eh.hdrlen + 1) << 3; > > - err = skb_cow_head(skb, hdrlen + skb->mac_len); > + err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len > + : LL_RESERVED_SPACE(dst->dev))); > if (unlikely(err)) > return err; > > @@ -256,16 +258,17 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb, > > static int ioam6_do_encap(struct net *net, struct sk_buff *skb, > struct ioam6_lwt_encap *tuninfo, > - struct in6_addr *tundst) > + struct in6_addr *tundst, > + struct dst_entry *dst) > { > - struct dst_entry *dst = skb_dst(skb); > struct ipv6hdr *hdr, *inner_hdr; > int hdrlen, len, err; > > hdrlen = (tuninfo->eh.hdrlen + 1) << 3; > len = sizeof(*hdr) + hdrlen; > > - err = skb_cow_head(skb, len + skb->mac_len); > + err = skb_cow_head(skb, len + (!dst ? skb->mac_len > + : LL_RESERVED_SPACE(dst->dev))); > if (unlikely(err)) > return err; > > @@ -285,7 +288,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, > hdr->nexthdr = NEXTHDR_HOP; > hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); > hdr->daddr = *tundst; > - ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, > + ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr, > IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); > > skb_postpush_rcsum(skb, hdr, len); > @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) > > orig_daddr = ipv6_hdr(skb)->daddr; > > + local_bh_disable(); > + dst = dst_cache_get(&ilwt->cache); > + local_bh_enable(); > + > switch (ilwt->mode) { > case IOAM6_IPTUNNEL_MODE_INLINE: > do_inline: > @@ -320,7 +327,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) > if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) > goto out; > > - err = ioam6_do_inline(net, skb, &ilwt->tuninfo); > + err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst); > if (unlikely(err)) > goto drop; > > @@ -328,7 +335,8 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) > case IOAM6_IPTUNNEL_MODE_ENCAP: > do_encap: > /* Encapsulation (ip6ip6) */ > - err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst); > + err = ioam6_do_encap(net, skb, > + &ilwt->tuninfo, &ilwt->tundst, dst); > if (unlikely(err)) > goto drop; > > @@ -346,10 +354,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) > goto drop; > } > > - local_bh_disable(); > - dst = dst_cache_get(&ilwt->cache); > - local_bh_enable(); > - > if (unlikely(!dst)) { > struct ipv6hdr *hdr = ipv6_hdr(skb); > struct flowi6 fl6; > @@ -371,15 +375,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) > local_bh_disable(); > dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); > local_bh_enable(); > + > + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); > + if (unlikely(err)) > + goto drop; > } > > skb_dst_drop(skb); > skb_dst_set(skb, dst); > > - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); > - if (unlikely(err)) > - goto drop; > - > if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) > return dst_output(net, sk, skb); > out:
On Tue, 2024-07-02 at 19:44 +0200, Justin Iurman wrote: > @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) > > orig_daddr = ipv6_hdr(skb)->daddr; > > + local_bh_disable(); > + dst = dst_cache_get(&ilwt->cache); > + local_bh_enable(); > + > switch (ilwt->mode) { > case IOAM6_IPTUNNEL_MODE_INLINE: I now see that the way you coded patch 1/2 makes this one easier. Still I think it's quite doubtful to make the dst cache access unconditional. Given the above I suggest to replace the 2 patches with a single one moving the whole dst_cache logic before the switch statement. Also this does not address a functional issue, IMHO it's more a performance improvement, could as well target net-next with no fixes tag. WRT seg6 and rpl tunnels, before any patch, I think we first need confirmation the problem is present there, too. Thanks, Paolo
On 7/4/24 11:23, Paolo Abeni wrote: > On Tue, 2024-07-02 at 19:44 +0200, Justin Iurman wrote: >> @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) >> >> orig_daddr = ipv6_hdr(skb)->daddr; >> >> + local_bh_disable(); >> + dst = dst_cache_get(&ilwt->cache); >> + local_bh_enable(); >> + >> switch (ilwt->mode) { >> case IOAM6_IPTUNNEL_MODE_INLINE: > > I now see that the way you coded patch 1/2 makes this one easier. Hi Paolo, Indeed. I originally had it as a single two-in-one patch, then I thought it would be clearer to split it up (looks like I was wrong, sorry). > Still I think it's quite doubtful to make the dst cache access > unconditional. By unconditional, you mean to get the cache _before_ the switch, right? If so, that's indeed the only solution to provide it to the encap/inline function for the mitigation. However, I don't see it as a problem. Instead of having (a) call encap/fill function, then (b) get cache; you'd have (a) get cache, then (b) call encap/fill function. IMHO, it's the same. I'll re-run our measurements and compare them to our previous results in order to confirm getting the cache early does not impact performance. The only exception would be when skb_cow_head returns an error in encap/fill functions: in that case, getting the cache early would be a waste of time, but this situation suggests there is a problem already so it's probably fine. > Given the above I suggest to replace the 2 patches with a single one > moving the whole dst_cache logic before the switch statement. Will do! > Also this does not address a functional issue, IMHO it's more a > performance improvement, could as well target net-next with no fixes > tag. Hmmm, it's indeed OK to target net-next for patch #2 since it could be considered as an improvement (not really a functional issue per se). However, I'm not sure for patch #1. Wouldn't the kernel crash if not enough headroom was allocated (assuming no check is done before writing in the driver)? > WRT seg6 and rpl tunnels, before any patch, I think we first need > confirmation the problem is present there, too. Ack. I'll try to run some tests to check that. Thanks, Justin > Thanks, > > Paolo >
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index b08c13550144..e5a7e7472b71 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -220,14 +220,16 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb) } static int ioam6_do_inline(struct net *net, struct sk_buff *skb, - struct ioam6_lwt_encap *tuninfo) + struct ioam6_lwt_encap *tuninfo, + struct dst_entry *dst) { struct ipv6hdr *oldhdr, *hdr; int hdrlen, err; hdrlen = (tuninfo->eh.hdrlen + 1) << 3; - err = skb_cow_head(skb, hdrlen + skb->mac_len); + err = skb_cow_head(skb, hdrlen + (!dst ? skb->mac_len + : LL_RESERVED_SPACE(dst->dev))); if (unlikely(err)) return err; @@ -256,16 +258,17 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb, static int ioam6_do_encap(struct net *net, struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo, - struct in6_addr *tundst) + struct in6_addr *tundst, + struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr, *inner_hdr; int hdrlen, len, err; hdrlen = (tuninfo->eh.hdrlen + 1) << 3; len = sizeof(*hdr) + hdrlen; - err = skb_cow_head(skb, len + skb->mac_len); + err = skb_cow_head(skb, len + (!dst ? skb->mac_len + : LL_RESERVED_SPACE(dst->dev))); if (unlikely(err)) return err; @@ -285,7 +288,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, hdr->nexthdr = NEXTHDR_HOP; hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); hdr->daddr = *tundst; - ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, + ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr, IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); skb_postpush_rcsum(skb, hdr, len); @@ -313,6 +316,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) orig_daddr = ipv6_hdr(skb)->daddr; + local_bh_disable(); + dst = dst_cache_get(&ilwt->cache); + local_bh_enable(); + switch (ilwt->mode) { case IOAM6_IPTUNNEL_MODE_INLINE: do_inline: @@ -320,7 +327,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) goto out; - err = ioam6_do_inline(net, skb, &ilwt->tuninfo); + err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst); if (unlikely(err)) goto drop; @@ -328,7 +335,8 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) case IOAM6_IPTUNNEL_MODE_ENCAP: do_encap: /* Encapsulation (ip6ip6) */ - err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst); + err = ioam6_do_encap(net, skb, + &ilwt->tuninfo, &ilwt->tundst, dst); if (unlikely(err)) goto drop; @@ -346,10 +354,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - local_bh_disable(); - dst = dst_cache_get(&ilwt->cache); - local_bh_enable(); - if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; @@ -371,15 +375,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) local_bh_disable(); dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); local_bh_enable(); + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; } skb_dst_drop(skb); skb_dst_set(skb, dst); - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); - if (unlikely(err)) - goto drop; - if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) return dst_output(net, sk, skb); out:
Get the cache _before_ adding bytes. This way, we provide the dst entry to skb_cow_head(), so that we call LL_RESERVED_SPACE() on it and avoid two reallocations in some specific cases. We cannot do much when the dst entry is empty (cache is empty, this is the first time): in that case, we use skb->mac_len by default and two reallocations will happen in those specific cases. However, it will only happen once, not every single time. Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation") Signed-off-by: Justin Iurman <justin.iurman@uliege.be> --- net/ipv6/ioam6_iptunnel.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-)