Message ID | 20240815214527.2100137-2-tom@herbertland.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | flow_dissector: Dissect UDP encapsulation protocols | expand |
Tom Herbert wrote: > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry > a plain Etherent frame. Add case in skb_flow_dissect to parse > packets of this type > > If the GRE protocol is ETH_P_TEB then just process that as any > another EtherType since it's now supported in the main loop > > Signed-off-by: Tom Herbert <tom@herbertland.com> Reviewed-by: Willem de Bruijn <willemb@google.com> > - if (gre_ver == 0) { > - if (*p_proto == htons(ETH_P_TEB)) { > - const struct ethhdr *eth; > - struct ethhdr _eth; > - > - eth = __skb_header_pointer(skb, *p_nhoff + offset, > - sizeof(_eth), > - data, *p_hlen, &_eth); > - if (!eth) > - return FLOW_DISSECT_RET_OUT_BAD; > - *p_proto = eth->h_proto; > - offset += sizeof(*eth); > - > - /* Cap headers that we access via pointers at the > - * end of the Ethernet header as our maximum alignment > - * at that point is only 2 bytes. > - */ > - if (NET_IP_ALIGN) > - *p_hlen = *p_nhoff + offset; > - } > - } else { /* version 1, must be PPTP */ > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net, > > break; > } > + case htons(ETH_P_TEB): { > + const struct ethhdr *eth; > + struct ethhdr _eth; > + > + eth = __skb_header_pointer(skb, nhoff, sizeof(_eth), > + data, hlen, &_eth); > + if (!eth) > + goto out_bad; > + > + proto = eth->h_proto; > + nhoff += sizeof(*eth); > + > + /* Cap headers that we access via pointers at the > + * end of the Ethernet header as our maximum alignment > + * at that point is only 2 bytes. > + */ > + if (NET_IP_ALIGN) > + hlen = nhoff; I wonder why this exists. But besides the point of this move. > + > + goto proto_again; > + }
On Fri, Aug 16, 2024 at 11:54 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Tom Herbert wrote: > > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry > > a plain Etherent frame. Add case in skb_flow_dissect to parse > > packets of this type > > > > If the GRE protocol is ETH_P_TEB then just process that as any > > another EtherType since it's now supported in the main loop > > > > Signed-off-by: Tom Herbert <tom@herbertland.com> > > Reviewed-by: Willem de Bruijn <willemb@google.com> > > > - if (gre_ver == 0) { > > - if (*p_proto == htons(ETH_P_TEB)) { > > - const struct ethhdr *eth; > > - struct ethhdr _eth; > > - > > - eth = __skb_header_pointer(skb, *p_nhoff + offset, > > - sizeof(_eth), > > - data, *p_hlen, &_eth); > > - if (!eth) > > - return FLOW_DISSECT_RET_OUT_BAD; > > - *p_proto = eth->h_proto; > > - offset += sizeof(*eth); > > - > > - /* Cap headers that we access via pointers at the > > - * end of the Ethernet header as our maximum alignment > > - * at that point is only 2 bytes. > > - */ > > - if (NET_IP_ALIGN) > > - *p_hlen = *p_nhoff + offset; > > - } > > - } else { /* version 1, must be PPTP */ > > > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net, > > > > break; > > } > > + case htons(ETH_P_TEB): { > > + const struct ethhdr *eth; > > + struct ethhdr _eth; > > + > > + eth = __skb_header_pointer(skb, nhoff, sizeof(_eth), > > + data, hlen, &_eth); > > + if (!eth) > > + goto out_bad; > > + > > + proto = eth->h_proto; > > + nhoff += sizeof(*eth); > > + > > + /* Cap headers that we access via pointers at the > > + * end of the Ethernet header as our maximum alignment > > + * at that point is only 2 bytes. > > + */ > > + if (NET_IP_ALIGN) > > + hlen = nhoff; > > I wonder why this exists. But besides the point of this move. Willem, Ethernet header breaks 4-byte alignment of encapsulated protocols since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures that don't like unaligned loads. Tom > > > + > > + goto proto_again; > > + } >
Tom Herbert wrote: > On Fri, Aug 16, 2024 at 11:54 AM Willem de Bruijn > <willemdebruijn.kernel@gmail.com> wrote: > > > > Tom Herbert wrote: > > > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry > > > a plain Etherent frame. Add case in skb_flow_dissect to parse > > > packets of this type > > > > > > If the GRE protocol is ETH_P_TEB then just process that as any > > > another EtherType since it's now supported in the main loop > > > > > > Signed-off-by: Tom Herbert <tom@herbertland.com> > > > > Reviewed-by: Willem de Bruijn <willemb@google.com> > > > > > - if (gre_ver == 0) { > > > - if (*p_proto == htons(ETH_P_TEB)) { > > > - const struct ethhdr *eth; > > > - struct ethhdr _eth; > > > - > > > - eth = __skb_header_pointer(skb, *p_nhoff + offset, > > > - sizeof(_eth), > > > - data, *p_hlen, &_eth); > > > - if (!eth) > > > - return FLOW_DISSECT_RET_OUT_BAD; > > > - *p_proto = eth->h_proto; > > > - offset += sizeof(*eth); > > > - > > > - /* Cap headers that we access via pointers at the > > > - * end of the Ethernet header as our maximum alignment > > > - * at that point is only 2 bytes. > > > - */ > > > - if (NET_IP_ALIGN) > > > - *p_hlen = *p_nhoff + offset; > > > - } > > > - } else { /* version 1, must be PPTP */ > > > > > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net, > > > > > > break; > > > } > > > + case htons(ETH_P_TEB): { > > > + const struct ethhdr *eth; > > > + struct ethhdr _eth; > > > + > > > + eth = __skb_header_pointer(skb, nhoff, sizeof(_eth), > > > + data, hlen, &_eth); > > > + if (!eth) > > > + goto out_bad; > > > + > > > + proto = eth->h_proto; > > > + nhoff += sizeof(*eth); > > > + > > > + /* Cap headers that we access via pointers at the > > > + * end of the Ethernet header as our maximum alignment > > > + * at that point is only 2 bytes. > > > + */ > > > + if (NET_IP_ALIGN) > > > + hlen = nhoff; > > > > I wonder why this exists. But besides the point of this move. > > Willem, > > Ethernet header breaks 4-byte alignment of encapsulated protocols > since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures > that don't like unaligned loads. I understand how NET_IP_ALIGN is used by drivers. I don't understand its use here in the flow dissector. Why is hlen capped if it is set?
On Tue, Aug 20, 2024 at 9:30 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Tom Herbert wrote: > > On Fri, Aug 16, 2024 at 11:54 AM Willem de Bruijn > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > Tom Herbert wrote: > > > > ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry > > > > a plain Etherent frame. Add case in skb_flow_dissect to parse > > > > packets of this type > > > > > > > > If the GRE protocol is ETH_P_TEB then just process that as any > > > > another EtherType since it's now supported in the main loop > > > > > > > > Signed-off-by: Tom Herbert <tom@herbertland.com> > > > > > > Reviewed-by: Willem de Bruijn <willemb@google.com> > > > > > > > - if (gre_ver == 0) { > > > > - if (*p_proto == htons(ETH_P_TEB)) { > > > > - const struct ethhdr *eth; > > > > - struct ethhdr _eth; > > > > - > > > > - eth = __skb_header_pointer(skb, *p_nhoff + offset, > > > > - sizeof(_eth), > > > > - data, *p_hlen, &_eth); > > > > - if (!eth) > > > > - return FLOW_DISSECT_RET_OUT_BAD; > > > > - *p_proto = eth->h_proto; > > > > - offset += sizeof(*eth); > > > > - > > > > - /* Cap headers that we access via pointers at the > > > > - * end of the Ethernet header as our maximum alignment > > > > - * at that point is only 2 bytes. > > > > - */ > > > > - if (NET_IP_ALIGN) > > > > - *p_hlen = *p_nhoff + offset; > > > > - } > > > > - } else { /* version 1, must be PPTP */ > > > > > > > @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net, > > > > > > > > break; > > > > } > > > > + case htons(ETH_P_TEB): { > > > > + const struct ethhdr *eth; > > > > + struct ethhdr _eth; > > > > + > > > > + eth = __skb_header_pointer(skb, nhoff, sizeof(_eth), > > > > + data, hlen, &_eth); > > > > + if (!eth) > > > > + goto out_bad; > > > > + > > > > + proto = eth->h_proto; > > > > + nhoff += sizeof(*eth); > > > > + > > > > + /* Cap headers that we access via pointers at the > > > > + * end of the Ethernet header as our maximum alignment > > > > + * at that point is only 2 bytes. > > > > + */ > > > > + if (NET_IP_ALIGN) > > > > + hlen = nhoff; > > > > > > I wonder why this exists. But besides the point of this move. > > > > Willem, > > > > Ethernet header breaks 4-byte alignment of encapsulated protocols > > since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures > > that don't like unaligned loads. > > I understand how NET_IP_ALIGN is used by drivers. > > I don't understand its use here in the flow dissector. Why is hlen > capped if it is set? Willem, For the real Ethernet header the receive skbuf is offset by two so that device places the packet such that the Ethernet payload, i.e. IP header, is aligned to four bytes (14+2=16 which will be offset of IP header). When a packets contains an encapsulated Ethernet header, the offset of the header is aligned to four bytes which means the payload of that Ethernet header, i.e. an encapsulated IP header, is not four byte aligned and neither are any subsequent headers (TCP, UDP, etc.). On some architectures, performing unaligned loads is expensive compared to aligned loads, so hlen is being capped here to avoid having flow dissector do that on unaligned headers after the Ethernet header. It's a tradeoff between performance and deeper flow dissection. Tom
> > > > > + /* Cap headers that we access via pointers at the > > > > > + * end of the Ethernet header as our maximum alignment > > > > > + * at that point is only 2 bytes. > > > > > + */ > > > > > + if (NET_IP_ALIGN) > > > > > + hlen = nhoff; > > > > > > > > I wonder why this exists. But besides the point of this move. > > > > > > Willem, > > > > > > Ethernet header breaks 4-byte alignment of encapsulated protocols > > > since it's 14 bytes, so the NET_IP_ALIGN can be used on architectures > > > that don't like unaligned loads. > > > > I understand how NET_IP_ALIGN is used by drivers. > > > > I don't understand its use here in the flow dissector. Why is hlen > > capped if it is set? > > Willem, > > For the real Ethernet header the receive skbuf is offset by two so > that device places the packet such that the Ethernet payload, i.e. IP > header, is aligned to four bytes (14+2=16 which will be offset of IP > header). When a packets contains an encapsulated Ethernet header, the > offset of the header is aligned to four bytes which means the payload > of that Ethernet header, i.e. an encapsulated IP header, is not four > byte aligned and neither are any subsequent headers (TCP, UDP, etc.). > On some architectures, performing unaligned loads is expensive > compared to aligned loads, so hlen is being capped here to avoid > having flow dissector do that on unaligned headers after the Ethernet > header. It's a tradeoff between performance and deeper flow > dissection. Thanks Tom. That explains. So flow dissector behavior differs in this subtle way depending on platform. Maybe this is a good opportunity to add a comment. This thread alone also already documents it to some extent.
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0e638a37aa09..4b116119086a 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -654,7 +654,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, struct flow_dissector_key_control *key_control, struct flow_dissector *flow_dissector, void *target_container, const void *data, - __be16 *p_proto, int *p_nhoff, int *p_hlen, + __be16 *p_proto, int *p_nhoff, int hlen, unsigned int flags) { struct flow_dissector_key_keyid *key_keyid; @@ -663,7 +663,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, u16 gre_ver; hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), - data, *p_hlen, &_hdr); + data, hlen, &_hdr); if (!hdr) return FLOW_DISSECT_RET_OUT_BAD; @@ -695,7 +695,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, keyid = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_keyid), - data, *p_hlen, &_keyid); + data, hlen, &_keyid); if (!keyid) return FLOW_DISSECT_RET_OUT_BAD; @@ -715,27 +715,11 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, if (hdr->flags & GRE_SEQ) offset += sizeof_field(struct pptp_gre_header, seq); - if (gre_ver == 0) { - if (*p_proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; - - eth = __skb_header_pointer(skb, *p_nhoff + offset, - sizeof(_eth), - data, *p_hlen, &_eth); - if (!eth) - return FLOW_DISSECT_RET_OUT_BAD; - *p_proto = eth->h_proto; - offset += sizeof(*eth); - - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - *p_hlen = *p_nhoff + offset; - } - } else { /* version 1, must be PPTP */ + /* For GRE version 0 p_proto is already correctly set (including if + * it is ETH_P_TEB) + */ + + if (gre_ver == 1) { /* Version 1 is PPP */ u8 _ppp_hdr[PPP_HDRLEN]; u8 *ppp_hdr; @@ -744,7 +728,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, sizeof(_ppp_hdr), - data, *p_hlen, _ppp_hdr); + data, hlen, _ppp_hdr); if (!ppp_hdr) return FLOW_DISSECT_RET_OUT_BAD; @@ -1284,6 +1268,27 @@ bool __skb_flow_dissect(const struct net *net, break; } + case htons(ETH_P_TEB): { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, nhoff, sizeof(_eth), + data, hlen, &_eth); + if (!eth) + goto out_bad; + + proto = eth->h_proto; + nhoff += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = nhoff; + + goto proto_again; + } case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan = NULL; @@ -1531,7 +1536,7 @@ bool __skb_flow_dissect(const struct net *net, fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, target_container, data, - &proto, &nhoff, &hlen, flags); + &proto, &nhoff, hlen, flags); break; case NEXTHDR_HOP:
ETH_P_TEB (Trans Ether Bridging) is the EtherType to carry a plain Etherent frame. Add case in skb_flow_dissect to parse packets of this type If the GRE protocol is ETH_P_TEB then just process that as any another EtherType since it's now supported in the main loop Signed-off-by: Tom Herbert <tom@herbertland.com> --- net/core/flow_dissector.c | 57 +++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 26 deletions(-)