diff mbox series

[RFC,net-next,2/3] icmp: ICMPV6: Examine invoking packet for Segment Route Headers.

Message ID 20211201163245.3629254-3-andrew@lunn.ch (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Fix traceroute in the presence of SRv6 | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2234 this patch: 2234
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 357 this patch: 357
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2347 this patch: 2347
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 77 lines checked
netdev/kdoc success Errors and warnings before: 37 this patch: 37
netdev/source_inline success Was 0 now: 0

Commit Message

Andrew Lunn Dec. 1, 2021, 4:32 p.m. UTC
RFC8754 says:

ICMP error packets generated within the SR domain are sent to source
nodes within the SR domain.  The invoking packet in the ICMP error
message may contain an SRH.  Since the destination address of a packet
with an SRH changes as each segment is processed, it may not be the
destination used by the socket or application that generated the
invoking packet.

For the source of an invoking packet to process the ICMP error
message, the ultimate destination address of the IPv6 header may be
required.  The following logic is used to determine the destination
address for use by protocol-error handlers.

*  Walk all extension headers of the invoking IPv6 packet to the
   routing extension header preceding the upper-layer header.

   -  If routing header is type 4 Segment Routing Header (SRH)

      o  The SID at Segment List[0] may be used as the destination
         address of the invoking packet.

Clone the skb and modify the header offset to give a new skb which
contains the invoking packet. The seg6 helpers can then be used on the
skb to find any segment routing headers. If found, mark this fact in
the IPv6 control block of the skb, and store the offset into the
packet of the SRH.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
---
 include/linux/ipv6.h |  2 ++
 net/ipv6/icmp.c      | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

Comments

Willem de Bruijn Dec. 1, 2021, 5:33 p.m. UTC | #1
>  include/linux/ipv6.h |  2 ++
>  net/ipv6/icmp.c      | 36 +++++++++++++++++++++++++++++++++++-
>  2 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 20c1f968da7c..d8ab5022d397 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -133,6 +133,7 @@ struct inet6_skb_parm {
>         __u16                   dsthao;
>  #endif
>         __u16                   frag_max_size;
> +       __u16                   srhoff;

Out of scope for this patch, but I guess we could use a

BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof_field(struct sk_buff, cb));

>
>  #define IP6SKB_XFRM_TRANSFORMED        1
>  #define IP6SKB_FORWARDED       2
> @@ -142,6 +143,7 @@ struct inet6_skb_parm {
>  #define IP6SKB_HOPBYHOP        32
>  #define IP6SKB_L3SLAVE         64
>  #define IP6SKB_JUMBOGRAM      128
> +#define IP6SKB_SEG6          512

256?

>  };
>
>  #if defined(CONFIG_NET_L3_MASTER_DEV)
> diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
> index a7c31ab67c5d..315787b79f29 100644
> --- a/net/ipv6/icmp.c
> +++ b/net/ipv6/icmp.c
> @@ -57,6 +57,7 @@
>  #include <net/protocol.h>
>  #include <net/raw.h>
>  #include <net/rawv6.h>
> +#include <net/seg6.h>
>  #include <net/transp_v6.h>
>  #include <net/ip6_route.h>
>  #include <net/addrconf.h>
> @@ -818,9 +819,40 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
>         local_bh_enable();
>  }
>
> +/* Determine if the invoking packet contains a segment routing header.
> + * If it does, extract the true destination address, which is the
> + * first segment address
> + */
> +static void icmpv6_notify_srh(struct sk_buff *skb, struct inet6_skb_parm *opt)
> +{
> +       struct sk_buff *skb_orig;
> +       struct ipv6_sr_hdr *srh;
> +
> +       skb_orig = skb_clone(skb, GFP_ATOMIC);
> +       if (!skb_orig)
> +               return;

Is this to be allowed to write to skb->cb? Or because seg6_get_srh
calls pskb_may_pull to parse the headers?

It is unlikely (not impossible) in this path for the packet to be
shared or cloned. Avoid this operation when it isn't? Most packets
will not actually have segment routing, so this imposes significant
cost on the common case (if in the not common ICMP processing path).

nit: I found the name skb_orig confusing, as it is not in the meaning
of preserve the original skb as at function entry.

> +       skb_dst_drop(skb_orig);
> +       skb_reset_network_header(skb_orig);
> +
> +       srh = seg6_get_srh(skb_orig, 0);
> +       if (!srh)
> +               goto out;
> +
> +       if (srh->type != IPV6_SRCRT_TYPE_4)
> +               goto out;
> +
> +       opt->flags |= IP6SKB_SEG6;
> +       opt->srhoff = (unsigned char *)srh - skb->data;

Should this offset be against skb->head, in case other data move
operations could occur?

Also, what happens if the header was in a frags that was pulled by
pskb_may_pull in seg6_get_srh.

If we can expect headers to exist in the linear segment, then perhaps
the whole code can be simplified and the clone can be avoided.

> +
> +out:
> +       kfree_skb(skb_orig);
> +}
> +
>  void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
>  {
>         const struct inet6_protocol *ipprot;
> +       struct inet6_skb_parm *opt = IP6CB(skb);
>         int inner_offset;
>         __be16 frag_off;
>         u8 nexthdr;
> @@ -829,6 +861,8 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
>         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
>                 goto out;
>
> +       icmpv6_notify_srh(skb, opt);
> +
>         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
>         if (ipv6_ext_hdr(nexthdr)) {
>                 /* now skip over extension headers */
> @@ -853,7 +887,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
>
>         ipprot = rcu_dereference(inet6_protos[nexthdr]);
>         if (ipprot && ipprot->err_handler)
> -               ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
> +               ipprot->err_handler(skb, opt, type, code, inner_offset, info);
>
>         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
>         return;
> --
> 2.33.1
>
Andrew Lunn Dec. 1, 2021, 6:10 p.m. UTC | #2
On Wed, Dec 01, 2021 at 09:33:32AM -0800, Willem de Bruijn wrote:
> >  include/linux/ipv6.h |  2 ++
> >  net/ipv6/icmp.c      | 36 +++++++++++++++++++++++++++++++++++-
> >  2 files changed, 37 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> > index 20c1f968da7c..d8ab5022d397 100644
> > --- a/include/linux/ipv6.h
> > +++ b/include/linux/ipv6.h
> > @@ -133,6 +133,7 @@ struct inet6_skb_parm {
> >         __u16                   dsthao;
> >  #endif
> >         __u16                   frag_max_size;
> > +       __u16                   srhoff;
> 
> Out of scope for this patch, but I guess we could use a
> 
> BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof_field(struct sk_buff, cb));
 
There is something like that already. I triggered a BUILD_BUG_ON
failure when i put the actual IPv6 destination address here, rather
than an offset to it.

> >  #define IP6SKB_XFRM_TRANSFORMED        1
> >  #define IP6SKB_FORWARDED       2
> > @@ -142,6 +143,7 @@ struct inet6_skb_parm {
> >  #define IP6SKB_HOPBYHOP        32
> >  #define IP6SKB_L3SLAVE         64
> >  #define IP6SKB_JUMBOGRAM      128
> > +#define IP6SKB_SEG6          512
> 
> 256?

Doh!

> > +static void icmpv6_notify_srh(struct sk_buff *skb, struct inet6_skb_parm *opt)
> > +{
> > +       struct sk_buff *skb_orig;
> > +       struct ipv6_sr_hdr *srh;
> > +
> > +       skb_orig = skb_clone(skb, GFP_ATOMIC);
> > +       if (!skb_orig)
> > +               return;
> 
> Is this to be allowed to write to skb->cb? Or because seg6_get_srh
> calls pskb_may_pull to parse the headers?

This is an ICMP error message. So we have an IP packet, skb, which
contains in the message body the IP packet which invoked the error. If
we pass skb to seg6_get_srh() it will look in the received ICMP
packet. But we actually want to find the SRH in the packet which
invoked the error, the one which is in the message body. So the code
makes a clone of the skb, and then updates the pointers so that it
points to the invoking packet within the ICMP packet. Then we can use
seg6_get_srh() on this inner packet, since it just looks like an
ordinary IP packet.

> It is unlikely (not impossible) in this path for the packet to be
> shared or cloned. Avoid this operation when it isn't? Most packets
> will not actually have segment routing, so this imposes significant
> cost on the common case (if in the not common ICMP processing path).
> 
> nit: I found the name skb_orig confusing, as it is not in the meaning
> of preserve the original skb as at function entry.

skb_invoking? That seems to be the ICMP terminology?

> > +       skb_dst_drop(skb_orig);
> > +       skb_reset_network_header(skb_orig);
> > +
> > +       srh = seg6_get_srh(skb_orig, 0);
> > +       if (!srh)
> > +               goto out;
> > +
> > +       if (srh->type != IPV6_SRCRT_TYPE_4)
> > +               goto out;
> > +
> > +       opt->flags |= IP6SKB_SEG6;
> > +       opt->srhoff = (unsigned char *)srh - skb->data;
> 
> Should this offset be against skb->head, in case other data move
> operations could occur?

I copied the idea from get_srh(). It does:

srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);

So i'm just undoing it.

> Also, what happens if the header was in a frags that was pulled by
> pskb_may_pull in seg6_get_srh.

Yes, i checked that. Because the skb has been cloned, if it needs to
rearrange the packet because it goes over a fragment boundary,
pskb_may_pull() will return false. And then we won't find the
SRH. Nothing bad happens, traceroute is till broken as before.  What
is a typical fragment size? We basically need a MAC header, IPv6
header, ICMP Header and another IP header. 14 + 40 + 8 + 40. Plus the
SRH headers. So if 128 byte fragments are being used, then yes, it
could be an issue. But is that realistic? It seems more likely 1K, 2K
or 4K fragments are used?

> If we can expect headers to exist in the linear segment, then perhaps
> the whole code can be simplified and the clone can be avoided.

It will require seg6_get_srh() to be re-written so that you can tell
it to look at a nested IP header. Which actually means ipv6_find_hdr()
needs re-writing. Things like the helper ipv6_hdr(skb) point to the
ICMP packet IP header, not the invoking IP packet header inside the
ICMP packet. I didn't like the idea of such a rewrite.

	Andrew
Willem de Bruijn Dec. 1, 2021, 6:22 p.m. UTC | #3
> > > +static void icmpv6_notify_srh(struct sk_buff *skb, struct inet6_skb_parm *opt)
> > > +{
> > > +       struct sk_buff *skb_orig;
> > > +       struct ipv6_sr_hdr *srh;
> > > +
> > > +       skb_orig = skb_clone(skb, GFP_ATOMIC);
> > > +       if (!skb_orig)
> > > +               return;
> >
> > Is this to be allowed to write to skb->cb? Or because seg6_get_srh
> > calls pskb_may_pull to parse the headers?
>
> This is an ICMP error message. So we have an IP packet, skb, which
> contains in the message body the IP packet which invoked the error. If
> we pass skb to seg6_get_srh() it will look in the received ICMP
> packet. But we actually want to find the SRH in the packet which
> invoked the error, the one which is in the message body. So the code
> makes a clone of the skb, and then updates the pointers so that it
> points to the invoking packet within the ICMP packet. Then we can use
> seg6_get_srh() on this inner packet, since it just looks like an
> ordinary IP packet.

Ah of course. I clearly did not appreciate the importance of that
skb_reset_network_header.

> > It is unlikely (not impossible) in this path for the packet to be
> > shared or cloned. Avoid this operation when it isn't? Most packets
> > will not actually have segment routing, so this imposes significant
> > cost on the common case (if in the not common ICMP processing path).
> >
> > nit: I found the name skb_orig confusing, as it is not in the meaning
> > of preserve the original skb as at function entry.
>
> skb_invoking? That seems to be the ICMP terminology?

Sounds good, thanks.

> > > +       skb_dst_drop(skb_orig);
> > > +       skb_reset_network_header(skb_orig);
> > > +
> > > +       srh = seg6_get_srh(skb_orig, 0);
> > > +       if (!srh)
> > > +               goto out;
> > > +
> > > +       if (srh->type != IPV6_SRCRT_TYPE_4)
> > > +               goto out;
> > > +
> > > +       opt->flags |= IP6SKB_SEG6;
> > > +       opt->srhoff = (unsigned char *)srh - skb->data;
> >
> > Should this offset be against skb->head, in case other data move
> > operations could occur?
>
> I copied the idea from get_srh(). It does:
>
> srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
>
> So i'm just undoing it.
>
> > Also, what happens if the header was in a frags that was pulled by
> > pskb_may_pull in seg6_get_srh.
>
> Yes, i checked that. Because the skb has been cloned, if it needs to
> rearrange the packet because it goes over a fragment boundary,
> pskb_may_pull() will return false. And then we won't find the
> SRH.

Great. So the feature only works if the SRH is in the linear header.

Then if the packet is not shared, you can just temporarily reset the
network header and revert it after?

> Nothing bad happens, traceroute is till broken as before.  What
> is a typical fragment size?

The question here is not the size in frags[], but that of the linear
section. This is really device driver and mtu specific. For many
devices and 1500 B mtu, the entire packet in linear seems quite
likely.
Andrew Lunn Dec. 1, 2021, 7:03 p.m. UTC | #4
On Wed, Dec 01, 2021 at 10:22:38AM -0800, Willem de Bruijn wrote:
> > > > +static void icmpv6_notify_srh(struct sk_buff *skb, struct inet6_skb_parm *opt)
> > > > +{
> > > > +       struct sk_buff *skb_orig;
> > > > +       struct ipv6_sr_hdr *srh;
> > > > +
> > > > +       skb_orig = skb_clone(skb, GFP_ATOMIC);
> > > > +       if (!skb_orig)
> > > > +               return;
> > >
> > > Is this to be allowed to write to skb->cb? Or because seg6_get_srh
> > > calls pskb_may_pull to parse the headers?
> >
> > This is an ICMP error message. So we have an IP packet, skb, which
> > contains in the message body the IP packet which invoked the error. If
> > we pass skb to seg6_get_srh() it will look in the received ICMP
> > packet. But we actually want to find the SRH in the packet which
> > invoked the error, the one which is in the message body. So the code
> > makes a clone of the skb, and then updates the pointers so that it
> > points to the invoking packet within the ICMP packet. Then we can use
> > seg6_get_srh() on this inner packet, since it just looks like an
> > ordinary IP packet.
> 
> Ah of course. I clearly did not appreciate the importance of that
> skb_reset_network_header.

So i should probably add a comment here. If we stick with this design.

> > Yes, i checked that. Because the skb has been cloned, if it needs to
> > rearrange the packet because it goes over a fragment boundary,
> > pskb_may_pull() will return false. And then we won't find the
> > SRH.
> 
> Great. So the feature only works if the SRH is in the linear header.

Yes, traceroute will remain broken if the invoking SRH header is not
in the linear header.

> Then if the packet is not shared, you can just temporarily reset the
> network header and revert it after?

Maybe. I was worried about any side affects of such an
operation. Working on a clone seemed a lot less risky.

Is it safe to due such games with the network header?

	Andrew
Willem de Bruijn Dec. 1, 2021, 7:19 p.m. UTC | #5
> > Then if the packet is not shared, you can just temporarily reset the
> > network header and revert it after?
>
> Maybe. I was worried about any side affects of such an
> operation. Working on a clone seemed a lot less risky.
>
> Is it safe to due such games with the network header?

As long as nothing else is accessing the skb, so only if it is not shared.

Packet sockets do similar temporary modifications, for one example.
See drop_n_restore in packet_rcv.
diff mbox series

Patch

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 20c1f968da7c..d8ab5022d397 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -133,6 +133,7 @@  struct inet6_skb_parm {
 	__u16			dsthao;
 #endif
 	__u16			frag_max_size;
+	__u16			srhoff;
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
@@ -142,6 +143,7 @@  struct inet6_skb_parm {
 #define IP6SKB_HOPBYHOP        32
 #define IP6SKB_L3SLAVE         64
 #define IP6SKB_JUMBOGRAM      128
+#define IP6SKB_SEG6	      512
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a7c31ab67c5d..315787b79f29 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -57,6 +57,7 @@ 
 #include <net/protocol.h>
 #include <net/raw.h>
 #include <net/rawv6.h>
+#include <net/seg6.h>
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
@@ -818,9 +819,40 @@  static void icmpv6_echo_reply(struct sk_buff *skb)
 	local_bh_enable();
 }
 
+/* Determine if the invoking packet contains a segment routing header.
+ * If it does, extract the true destination address, which is the
+ * first segment address
+ */
+static void icmpv6_notify_srh(struct sk_buff *skb, struct inet6_skb_parm *opt)
+{
+	struct sk_buff *skb_orig;
+	struct ipv6_sr_hdr *srh;
+
+	skb_orig = skb_clone(skb, GFP_ATOMIC);
+	if (!skb_orig)
+		return;
+
+	skb_dst_drop(skb_orig);
+	skb_reset_network_header(skb_orig);
+
+	srh = seg6_get_srh(skb_orig, 0);
+	if (!srh)
+		goto out;
+
+	if (srh->type != IPV6_SRCRT_TYPE_4)
+		goto out;
+
+	opt->flags |= IP6SKB_SEG6;
+	opt->srhoff = (unsigned char *)srh - skb->data;
+
+out:
+	kfree_skb(skb_orig);
+}
+
 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
 	const struct inet6_protocol *ipprot;
+	struct inet6_skb_parm *opt = IP6CB(skb);
 	int inner_offset;
 	__be16 frag_off;
 	u8 nexthdr;
@@ -829,6 +861,8 @@  void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
+	icmpv6_notify_srh(skb, opt);
+
 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 	if (ipv6_ext_hdr(nexthdr)) {
 		/* now skip over extension headers */
@@ -853,7 +887,7 @@  void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 
 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
 	if (ipprot && ipprot->err_handler)
-		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
 
 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 	return;