diff mbox series

[net-next,v2,01/12] flow_dissector: Parse ETH_P_TEB and move out of GRE

Message ID 20240815214527.2100137-2-tom@herbertland.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series flow_dissector: Dissect UDP encapsulation protocols | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 29 this patch: 29
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 3 maintainers not CCed: pabeni@redhat.com ast@fiberby.net dcaratti@redhat.com
netdev/build_clang success Errors and warnings before: 29 this patch: 29
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 30 this patch: 30
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 99 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 3 this patch: 3
netdev/source_inline success Was 0 now: 0

Commit Message

Tom Herbert Aug. 15, 2024, 9:45 p.m. UTC
ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry
a plain Etherent frame. Add case in skb_flow_dissect to parse
packets of this type

If the GRE protocol is ETH_P_TEB then just process that as any
another EtherType since it's now supported in the main loop

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 net/core/flow_dissector.c | 57 +++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 26 deletions(-)

Comments

Willem de Bruijn Aug. 16, 2024, 6:54 p.m. UTC | #1
Tom Herbert wrote:
> ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry
> a plain Etherent frame. Add case in skb_flow_dissect to parse
> packets of this type
> 
> If the GRE protocol is ETH_P_TEB then just process that as any
> another EtherType since it's now supported in the main loop
> 
> Signed-off-by: Tom Herbert <tom@herbertland.com>

Reviewed-by: Willem de Bruijn <willemb@google.com>

> -	if (gre_ver == 0) {
> -		if (*p_proto == htons(ETH_P_TEB)) {
> -			const struct ethhdr *eth;
> -			struct ethhdr _eth;
> -
> -			eth = __skb_header_pointer(skb, *p_nhoff + offset,
> -						   sizeof(_eth),
> -						   data, *p_hlen, &_eth);
> -			if (!eth)
> -				return FLOW_DISSECT_RET_OUT_BAD;
> -			*p_proto = eth->h_proto;
> -			offset += sizeof(*eth);
> -
> -			/* Cap headers that we access via pointers at the
> -			 * end of the Ethernet header as our maximum alignment
> -			 * at that point is only 2 bytes.
> -			 */
> -			if (NET_IP_ALIGN)
> -				*p_hlen = *p_nhoff + offset;
> -		}
> -	} else { /* version 1, must be PPTP */

> @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net,
>  
>  		break;
>  	}
> +	case htons(ETH_P_TEB): {
> +		const struct ethhdr *eth;
> +		struct ethhdr _eth;
> +
> +		eth = __skb_header_pointer(skb, nhoff, sizeof(_eth),
> +					   data, hlen, &_eth);
> +		if (!eth)
> +			goto out_bad;
> +
> +		proto = eth->h_proto;
> +		nhoff += sizeof(*eth);
> +
> +		/* Cap headers that we access via pointers at the
> +		 * end of the Ethernet header as our maximum alignment
> +		 * at that point is only 2 bytes.
> +		 */
> +		if (NET_IP_ALIGN)
> +			hlen = nhoff;

I wonder why this exists. But besides the point of this move.

> +
> +		goto proto_again;
> +	}
Tom Herbert Aug. 20, 2024, 4:21 p.m. UTC | #2
On Fri, Aug 16, 2024 at 11:54 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Tom Herbert wrote:
> > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry
> > a plain Etherent frame. Add case in skb_flow_dissect to parse
> > packets of this type
> >
> > If the GRE protocol is ETH_P_TEB then just process that as any
> > another EtherType since it's now supported in the main loop
> >
> > Signed-off-by: Tom Herbert <tom@herbertland.com>
>
> Reviewed-by: Willem de Bruijn <willemb@google.com>
>
> > -     if (gre_ver == 0) {
> > -             if (*p_proto == htons(ETH_P_TEB)) {
> > -                     const struct ethhdr *eth;
> > -                     struct ethhdr _eth;
> > -
> > -                     eth = __skb_header_pointer(skb, *p_nhoff + offset,
> > -                                                sizeof(_eth),
> > -                                                data, *p_hlen, &_eth);
> > -                     if (!eth)
> > -                             return FLOW_DISSECT_RET_OUT_BAD;
> > -                     *p_proto = eth->h_proto;
> > -                     offset += sizeof(*eth);
> > -
> > -                     /* Cap headers that we access via pointers at the
> > -                      * end of the Ethernet header as our maximum alignment
> > -                      * at that point is only 2 bytes.
> > -                      */
> > -                     if (NET_IP_ALIGN)
> > -                             *p_hlen = *p_nhoff + offset;
> > -             }
> > -     } else { /* version 1, must be PPTP */
>
> > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net,
> >
> >               break;
> >       }
> > +     case htons(ETH_P_TEB): {
> > +             const struct ethhdr *eth;
> > +             struct ethhdr _eth;
> > +
> > +             eth = __skb_header_pointer(skb, nhoff, sizeof(_eth),
> > +                                        data, hlen, &_eth);
> > +             if (!eth)
> > +                     goto out_bad;
> > +
> > +             proto = eth->h_proto;
> > +             nhoff += sizeof(*eth);
> > +
> > +             /* Cap headers that we access via pointers at the
> > +              * end of the Ethernet header as our maximum alignment
> > +              * at that point is only 2 bytes.
> > +              */
> > +             if (NET_IP_ALIGN)
> > +                     hlen = nhoff;
>
> I wonder why this exists. But besides the point of this move.

Willem,

Ethernet header breaks 4-byte alignment of encapsulated protocols
since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures
that don't like unaligned loads.

Tom

>
> > +
> > +             goto proto_again;
> > +     }
>
Willem de Bruijn Aug. 20, 2024, 4:30 p.m. UTC | #3
Tom Herbert wrote:
> On Fri, Aug 16, 2024 at 11:54 AM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > Tom Herbert wrote:
> > > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry
> > > a plain Etherent frame. Add case in skb_flow_dissect to parse
> > > packets of this type
> > >
> > > If the GRE protocol is ETH_P_TEB then just process that as any
> > > another EtherType since it's now supported in the main loop
> > >
> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
> >
> > Reviewed-by: Willem de Bruijn <willemb@google.com>
> >
> > > -     if (gre_ver == 0) {
> > > -             if (*p_proto == htons(ETH_P_TEB)) {
> > > -                     const struct ethhdr *eth;
> > > -                     struct ethhdr _eth;
> > > -
> > > -                     eth = __skb_header_pointer(skb, *p_nhoff + offset,
> > > -                                                sizeof(_eth),
> > > -                                                data, *p_hlen, &_eth);
> > > -                     if (!eth)
> > > -                             return FLOW_DISSECT_RET_OUT_BAD;
> > > -                     *p_proto = eth->h_proto;
> > > -                     offset += sizeof(*eth);
> > > -
> > > -                     /* Cap headers that we access via pointers at the
> > > -                      * end of the Ethernet header as our maximum alignment
> > > -                      * at that point is only 2 bytes.
> > > -                      */
> > > -                     if (NET_IP_ALIGN)
> > > -                             *p_hlen = *p_nhoff + offset;
> > > -             }
> > > -     } else { /* version 1, must be PPTP */
> >
> > > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net,
> > >
> > >               break;
> > >       }
> > > +     case htons(ETH_P_TEB): {
> > > +             const struct ethhdr *eth;
> > > +             struct ethhdr _eth;
> > > +
> > > +             eth = __skb_header_pointer(skb, nhoff, sizeof(_eth),
> > > +                                        data, hlen, &_eth);
> > > +             if (!eth)
> > > +                     goto out_bad;
> > > +
> > > +             proto = eth->h_proto;
> > > +             nhoff += sizeof(*eth);
> > > +
> > > +             /* Cap headers that we access via pointers at the
> > > +              * end of the Ethernet header as our maximum alignment
> > > +              * at that point is only 2 bytes.
> > > +              */
> > > +             if (NET_IP_ALIGN)
> > > +                     hlen = nhoff;
> >
> > I wonder why this exists. But besides the point of this move.
> 
> Willem,
> 
> Ethernet header breaks 4-byte alignment of encapsulated protocols
> since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures
> that don't like unaligned loads.

I understand how NET_IP_ALIGN is used by drivers.

I don't understand its use here in the flow dissector. Why is hlen
capped if it is set?
Tom Herbert Aug. 20, 2024, 4:42 p.m. UTC | #4
On Tue, Aug 20, 2024 at 9:30 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Tom Herbert wrote:
> > On Fri, Aug 16, 2024 at 11:54 AM Willem de Bruijn
> > <willemdebruijn.kernel@gmail.com> wrote:
> > >
> > > Tom Herbert wrote:
> > > > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry
> > > > a plain Etherent frame. Add case in skb_flow_dissect to parse
> > > > packets of this type
> > > >
> > > > If the GRE protocol is ETH_P_TEB then just process that as any
> > > > another EtherType since it's now supported in the main loop
> > > >
> > > > Signed-off-by: Tom Herbert <tom@herbertland.com>
> > >
> > > Reviewed-by: Willem de Bruijn <willemb@google.com>
> > >
> > > > -     if (gre_ver == 0) {
> > > > -             if (*p_proto == htons(ETH_P_TEB)) {
> > > > -                     const struct ethhdr *eth;
> > > > -                     struct ethhdr _eth;
> > > > -
> > > > -                     eth = __skb_header_pointer(skb, *p_nhoff + offset,
> > > > -                                                sizeof(_eth),
> > > > -                                                data, *p_hlen, &_eth);
> > > > -                     if (!eth)
> > > > -                             return FLOW_DISSECT_RET_OUT_BAD;
> > > > -                     *p_proto = eth->h_proto;
> > > > -                     offset += sizeof(*eth);
> > > > -
> > > > -                     /* Cap headers that we access via pointers at the
> > > > -                      * end of the Ethernet header as our maximum alignment
> > > > -                      * at that point is only 2 bytes.
> > > > -                      */
> > > > -                     if (NET_IP_ALIGN)
> > > > -                             *p_hlen = *p_nhoff + offset;
> > > > -             }
> > > > -     } else { /* version 1, must be PPTP */
> > >
> > > > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net,
> > > >
> > > >               break;
> > > >       }
> > > > +     case htons(ETH_P_TEB): {
> > > > +             const struct ethhdr *eth;
> > > > +             struct ethhdr _eth;
> > > > +
> > > > +             eth = __skb_header_pointer(skb, nhoff, sizeof(_eth),
> > > > +                                        data, hlen, &_eth);
> > > > +             if (!eth)
> > > > +                     goto out_bad;
> > > > +
> > > > +             proto = eth->h_proto;
> > > > +             nhoff += sizeof(*eth);
> > > > +
> > > > +             /* Cap headers that we access via pointers at the
> > > > +              * end of the Ethernet header as our maximum alignment
> > > > +              * at that point is only 2 bytes.
> > > > +              */
> > > > +             if (NET_IP_ALIGN)
> > > > +                     hlen = nhoff;
> > >
> > > I wonder why this exists. But besides the point of this move.
> >
> > Willem,
> >
> > Ethernet header breaks 4-byte alignment of encapsulated protocols
> > since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures
> > that don't like unaligned loads.
>
> I understand how NET_IP_ALIGN is used by drivers.
>
> I don't understand its use here in the flow dissector. Why is hlen
> capped if it is set?

Willem,

For the real Ethernet header the receive skbuf is offset by two so
that device places the packet such that the Ethernet payload, i.e. IP
header, is aligned to four bytes (14+2=16 which will be offset of IP
header). When a packets contains an encapsulated Ethernet header, the
offset of the header is aligned to four bytes which means the payload
of that Ethernet header, i.e. an encapsulated IP header, is not four
byte aligned and neither are any subsequent headers (TCP, UDP, etc.).
On some architectures, performing unaligned loads is expensive
compared to aligned loads, so hlen is being capped here to avoid
having flow dissector do that on unaligned headers after the Ethernet
header. It's a tradeoff between performance and deeper flow
dissection.

Tom
Willem de Bruijn Aug. 20, 2024, 5:43 p.m. UTC | #5
> > > > > +             /* Cap headers that we access via pointers at the
> > > > > +              * end of the Ethernet header as our maximum alignment
> > > > > +              * at that point is only 2 bytes.
> > > > > +              */
> > > > > +             if (NET_IP_ALIGN)
> > > > > +                     hlen = nhoff;
> > > >
> > > > I wonder why this exists. But besides the point of this move.
> > >
> > > Willem,
> > >
> > > Ethernet header breaks 4-byte alignment of encapsulated protocols
> > > since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures
> > > that don't like unaligned loads.
> >
> > I understand how NET_IP_ALIGN is used by drivers.
> >
> > I don't understand its use here in the flow dissector. Why is hlen
> > capped if it is set?
> 
> Willem,
> 
> For the real Ethernet header the receive skbuf is offset by two so
> that device places the packet such that the Ethernet payload, i.e. IP
> header, is aligned to four bytes (14+2=16 which will be offset of IP
> header). When a packets contains an encapsulated Ethernet header, the
> offset of the header is aligned to four bytes which means the payload
> of that Ethernet header, i.e. an encapsulated IP header, is not four
> byte aligned and neither are any subsequent headers (TCP, UDP, etc.).

> On some architectures, performing unaligned loads is expensive
> compared to aligned loads, so hlen is being capped here to avoid
> having flow dissector do that on unaligned headers after the Ethernet
> header. It's a tradeoff between performance and deeper flow
> dissection.

Thanks Tom. That explains.

So flow dissector behavior differs in this subtle way depending on
platform.

Maybe this is a good opportunity to add a comment. This thread alone
also already documents it to some extent.
diff mbox series

Patch

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0e638a37aa09..4b116119086a 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -654,7 +654,7 @@  __skb_flow_dissect_gre(const struct sk_buff *skb,
 		       struct flow_dissector_key_control *key_control,
 		       struct flow_dissector *flow_dissector,
 		       void *target_container, const void *data,
-		       __be16 *p_proto, int *p_nhoff, int *p_hlen,
+		       __be16 *p_proto, int *p_nhoff, int hlen,
 		       unsigned int flags)
 {
 	struct flow_dissector_key_keyid *key_keyid;
@@ -663,7 +663,7 @@  __skb_flow_dissect_gre(const struct sk_buff *skb,
 	u16 gre_ver;
 
 	hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
-				   data, *p_hlen, &_hdr);
+				   data, hlen, &_hdr);
 	if (!hdr)
 		return FLOW_DISSECT_RET_OUT_BAD;
 
@@ -695,7 +695,7 @@  __skb_flow_dissect_gre(const struct sk_buff *skb,
 
 		keyid = __skb_header_pointer(skb, *p_nhoff + offset,
 					     sizeof(_keyid),
-					     data, *p_hlen, &_keyid);
+					     data, hlen, &_keyid);
 		if (!keyid)
 			return FLOW_DISSECT_RET_OUT_BAD;
 
@@ -715,27 +715,11 @@  __skb_flow_dissect_gre(const struct sk_buff *skb,
 	if (hdr->flags & GRE_SEQ)
 		offset += sizeof_field(struct pptp_gre_header, seq);
 
-	if (gre_ver == 0) {
-		if (*p_proto == htons(ETH_P_TEB)) {
-			const struct ethhdr *eth;
-			struct ethhdr _eth;
-
-			eth = __skb_header_pointer(skb, *p_nhoff + offset,
-						   sizeof(_eth),
-						   data, *p_hlen, &_eth);
-			if (!eth)
-				return FLOW_DISSECT_RET_OUT_BAD;
-			*p_proto = eth->h_proto;
-			offset += sizeof(*eth);
-
-			/* Cap headers that we access via pointers at the
-			 * end of the Ethernet header as our maximum alignment
-			 * at that point is only 2 bytes.
-			 */
-			if (NET_IP_ALIGN)
-				*p_hlen = *p_nhoff + offset;
-		}
-	} else { /* version 1, must be PPTP */
+	/* For GRE version 0 p_proto is already correctly set (including if
+	 * it is ETH_P_TEB)
+	 */
+
+	if (gre_ver == 1) { /* Version 1 is PPP */
 		u8 _ppp_hdr[PPP_HDRLEN];
 		u8 *ppp_hdr;
 
@@ -744,7 +728,7 @@  __skb_flow_dissect_gre(const struct sk_buff *skb,
 
 		ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
 					       sizeof(_ppp_hdr),
-					       data, *p_hlen, _ppp_hdr);
+					       data, hlen, _ppp_hdr);
 		if (!ppp_hdr)
 			return FLOW_DISSECT_RET_OUT_BAD;
 
@@ -1284,6 +1268,27 @@  bool __skb_flow_dissect(const struct net *net,
 
 		break;
 	}
+	case htons(ETH_P_TEB): {
+		const struct ethhdr *eth;
+		struct ethhdr _eth;
+
+		eth = __skb_header_pointer(skb, nhoff, sizeof(_eth),
+					   data, hlen, &_eth);
+		if (!eth)
+			goto out_bad;
+
+		proto = eth->h_proto;
+		nhoff += sizeof(*eth);
+
+		/* Cap headers that we access via pointers at the
+		 * end of the Ethernet header as our maximum alignment
+		 * at that point is only 2 bytes.
+		 */
+		if (NET_IP_ALIGN)
+			hlen = nhoff;
+
+		goto proto_again;
+	}
 	case htons(ETH_P_8021AD):
 	case htons(ETH_P_8021Q): {
 		const struct vlan_hdr *vlan = NULL;
@@ -1531,7 +1536,7 @@  bool __skb_flow_dissect(const struct net *net,
 
 		fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
 					       target_container, data,
-					       &proto, &nhoff, &hlen, flags);
+					       &proto, &nhoff, hlen, flags);
 		break;
 
 	case NEXTHDR_HOP: