diff mbox series

[RFC,net-next,v3,1/4] flow_dissector: Add PPPoE dissectors

Message ID 20220629143859.209028-2-marcin.szycik@linux.intel.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series ice: PPPoE offload support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5647 this patch: 5647
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1211 this patch: 1211
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5796 this patch: 5796
netdev/checkpatch warning WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 7 this patch: 7
netdev/source_inline success Was 0 now: 0

Commit Message

Marcin Szycik June 29, 2022, 2:38 p.m. UTC
From: Wojciech Drewek <wojciech.drewek@intel.com>

Allow to dissect PPPoE specific fields which are:
- session ID (16 bits)
- ppp protocol (16 bits)

The goal is to make the following TC command possible:

  # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \
      flower \
        pppoe_sid 12 \
        ppp_proto ip \
      action drop

Note that only PPPoE Session is supported.

Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
---
v3: revert byte order changes in is_ppp_proto_supported from previous 
    version, add kernel-doc for is_ppp_proto_supported
v2: use ntohs instead of htons in is_ppp_proto_supported

 include/net/flow_dissector.h | 11 ++++++++
 net/core/flow_dissector.c    | 55 ++++++++++++++++++++++++++++++++----
 2 files changed, 60 insertions(+), 6 deletions(-)

Comments

Guillaume Nault June 30, 2022, 11:10 p.m. UTC | #1
On Wed, Jun 29, 2022 at 04:38:56PM +0200, Marcin Szycik wrote:
> From: Wojciech Drewek <wojciech.drewek@intel.com>
> 
> Allow to dissect PPPoE specific fields which are:
> - session ID (16 bits)
> - ppp protocol (16 bits)
> 
> The goal is to make the following TC command possible:
> 
>   # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \
>       flower \
>         pppoe_sid 12 \
>         ppp_proto ip \
>       action drop
> 
> Note that only PPPoE Session is supported.
> 
> Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
> ---
> v3: revert byte order changes in is_ppp_proto_supported from previous 
>     version, add kernel-doc for is_ppp_proto_supported
> v2: use ntohs instead of htons in is_ppp_proto_supported
> 
>  include/net/flow_dissector.h | 11 ++++++++
>  net/core/flow_dissector.c    | 55 ++++++++++++++++++++++++++++++++----
>  2 files changed, 60 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index a4c6057c7097..8ff40c7c3f1c 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -261,6 +261,16 @@ struct flow_dissector_key_num_of_vlans {
>  	u8 num_of_vlans;
>  };
>  
> +/**
> + * struct flow_dissector_key_pppoe:
> + * @session_id: pppoe session id
> + * @ppp_proto: ppp protocol
> + */
> +struct flow_dissector_key_pppoe {
> +	u16 session_id;
> +	__be16 ppp_proto;
> +};

Why isn't session_id __be16 too?

Also, I'm not sure I like mixing the PPPoE session ID and PPP protocol
fields in the same structure: they're part of two different protocols.
However, I can't anticipate any technical problem in doing so, and I
guess there's no easy way to let the flow dissector parse the PPP
header independently. So well, maybe we don't have choice...

>  enum flow_dissector_key_id {
>  	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
>  	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
> @@ -291,6 +301,7 @@ enum flow_dissector_key_id {
>  	FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
>  	FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
>  	FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
> +	FLOW_DISSECTOR_KEY_PPPOE,  /* struct flow_dissector_key_pppoe */
>  
>  	FLOW_DISSECTOR_KEY_MAX,
>  };
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index 6aee04f75e3e..42393af477a2 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -895,6 +895,39 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
>  	return result == BPF_OK;
>  }
>  
> +/**
> + * is_ppp_proto_supported - checks if inner PPP protocol should be dissected
> + * @proto: protocol type (PPP proto field)
> + */
> +static bool is_ppp_proto_supported(__be16 proto)
> +{
> +	switch (proto) {
> +	case htons(PPP_AT):
> +	case htons(PPP_IPX):
> +	case htons(PPP_VJC_COMP):
> +	case htons(PPP_VJC_UNCOMP):
> +	case htons(PPP_MP):
> +	case htons(PPP_COMPFRAG):
> +	case htons(PPP_COMP):
> +	case htons(PPP_MPLS_UC):
> +	case htons(PPP_MPLS_MC):
> +	case htons(PPP_IPCP):
> +	case htons(PPP_ATCP):
> +	case htons(PPP_IPXCP):
> +	case htons(PPP_IPV6CP):
> +	case htons(PPP_CCPFRAG):
> +	case htons(PPP_MPLSCP):
> +	case htons(PPP_LCP):
> +	case htons(PPP_PAP):
> +	case htons(PPP_LQR):
> +	case htons(PPP_CHAP):
> +	case htons(PPP_CBCP):
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +
>  /**
>   * __skb_flow_dissect - extract the flow_keys struct and return it
>   * @net: associated network namespace, derived from @skb if NULL
> @@ -1221,19 +1254,29 @@ bool __skb_flow_dissect(const struct net *net,
>  		}
>  
>  		nhoff += PPPOE_SES_HLEN;
> -		switch (hdr->proto) {
> -		case htons(PPP_IP):
> +		if (hdr->proto == htons(PPP_IP)) {
>  			proto = htons(ETH_P_IP);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -			break;
> -		case htons(PPP_IPV6):
> +		} else if (hdr->proto == htons(PPP_IPV6)) {
>  			proto = htons(ETH_P_IPV6);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -			break;

1)
Looks like you could easily handle MPLS too. Did you skip it on
purpose? (not enough users to justify writing and maintaining
the code?).

I don't mean MPLS has to be supported; I'd just like to know if it was
considered.

2)
Also this whole test is a bit weak: the version, type and code fields
must have precise values for the PPPoE Session packet to be valid.
If either version or type is different than 1, then the packet
advertises a new version of the protocol that we don't know how to parse
(or most probably the packet was forged or corrupted). A non-zero code
is also invalid.

I know the code was already like this before your patch, but it's
probably better to fix it before implementing hardware offload.

3)
Finally, the PPP protocol could be compressed and stored in 1 byte
instead of 2. This case wasn't handled before your patch, but I think
that should be fixed too before implementing hardware offload.

> -		default:
> +		} else if (is_ppp_proto_supported(hdr->proto)) {
> +			fdret = FLOW_DISSECT_RET_OUT_GOOD;
> +		} else {
>  			fdret = FLOW_DISSECT_RET_OUT_BAD;
>  			break;
>  		}
> +
> +		if (dissector_uses_key(flow_dissector,
> +				       FLOW_DISSECTOR_KEY_PPPOE)) {
> +			struct flow_dissector_key_pppoe *key_pppoe;
> +
> +			key_pppoe = skb_flow_dissector_target(flow_dissector,
> +							      FLOW_DISSECTOR_KEY_PPPOE,
> +							      target_container);
> +			key_pppoe->session_id = ntohs(hdr->hdr.sid);
> +			key_pppoe->ppp_proto = hdr->proto;
> +		}
>  		break;
>  	}
>  	case htons(ETH_P_TIPC): {
> -- 
> 2.35.1
>
Wojciech Drewek July 1, 2022, 10:53 a.m. UTC | #2
Hi Guillaume,

Thanks for the review!

> -----Original Message-----
> From: Guillaume Nault <gnault@redhat.com>
> Sent: piątek, 1 lipca 2022 01:10
> To: Marcin Szycik <marcin.szycik@linux.intel.com>
> Cc: netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>; davem@davemloft.net;
> xiyou.wangcong@gmail.com; Brandeburg, Jesse <jesse.brandeburg@intel.com>; gustavoars@kernel.org;
> baowen.zheng@corigine.com; boris.sukholitko@broadcom.com; edumazet@google.com; kuba@kernel.org; jhs@mojatatu.com;
> jiri@resnulli.us; kurt@linutronix.de; pablo@netfilter.org; pabeni@redhat.com; paulb@nvidia.com; simon.horman@corigine.com;
> komachi.yoshiki@gmail.com; zhangkaiheb@126.com; intel-wired-lan@lists.osuosl.org; michal.swiatkowski@linux.intel.com; Drewek,
> Wojciech <wojciech.drewek@intel.com>; Lobakin, Alexandr <alexandr.lobakin@intel.com>; mostrows@earthlink.net;
> paulus@samba.org
> Subject: Re: [RFC PATCH net-next v3 1/4] flow_dissector: Add PPPoE dissectors
> 
> On Wed, Jun 29, 2022 at 04:38:56PM +0200, Marcin Szycik wrote:
> > From: Wojciech Drewek <wojciech.drewek@intel.com>
> >
> > Allow to dissect PPPoE specific fields which are:
> > - session ID (16 bits)
> > - ppp protocol (16 bits)
> >
> > The goal is to make the following TC command possible:
> >
> >   # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \
> >       flower \
> >         pppoe_sid 12 \
> >         ppp_proto ip \
> >       action drop
> >
> > Note that only PPPoE Session is supported.
> >
> > Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
> > ---
> > v3: revert byte order changes in is_ppp_proto_supported from previous
> >     version, add kernel-doc for is_ppp_proto_supported
> > v2: use ntohs instead of htons in is_ppp_proto_supported
> >
> >  include/net/flow_dissector.h | 11 ++++++++
> >  net/core/flow_dissector.c    | 55 ++++++++++++++++++++++++++++++++----
> >  2 files changed, 60 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> > index a4c6057c7097..8ff40c7c3f1c 100644
> > --- a/include/net/flow_dissector.h
> > +++ b/include/net/flow_dissector.h
> > @@ -261,6 +261,16 @@ struct flow_dissector_key_num_of_vlans {
> >  	u8 num_of_vlans;
> >  };
> >
> > +/**
> > + * struct flow_dissector_key_pppoe:
> > + * @session_id: pppoe session id
> > + * @ppp_proto: ppp protocol
> > + */
> > +struct flow_dissector_key_pppoe {
> > +	u16 session_id;
> > +	__be16 ppp_proto;
> > +};
> 
> Why isn't session_id __be16 too?

I've got general impression that storing protocols values
in big endian is a standard through out the code and other values like vlan_id
don't have to be stored in big endian, but maybe it's just my illusion :)
I can change that in v3.

> 
> Also, I'm not sure I like mixing the PPPoE session ID and PPP protocol
> fields in the same structure: they're part of two different protocols.
> However, I can't anticipate any technical problem in doing so, and I
> guess there's no easy way to let the flow dissector parse the PPP
> header independently. So well, maybe we don't have choice...

We are not planning to match on other fields from PPP protocol so
separate structure just for it is not needed I guess.

> 
> >  enum flow_dissector_key_id {
> >  	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
> >  	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
> > @@ -291,6 +301,7 @@ enum flow_dissector_key_id {
> >  	FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
> >  	FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
> >  	FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
> > +	FLOW_DISSECTOR_KEY_PPPOE,  /* struct flow_dissector_key_pppoe */
> >
> >  	FLOW_DISSECTOR_KEY_MAX,
> >  };
> > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> > index 6aee04f75e3e..42393af477a2 100644
> > --- a/net/core/flow_dissector.c
> > +++ b/net/core/flow_dissector.c
> > @@ -895,6 +895,39 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
> >  	return result == BPF_OK;
> >  }
> >
> > +/**
> > + * is_ppp_proto_supported - checks if inner PPP protocol should be dissected
> > + * @proto: protocol type (PPP proto field)
> > + */
> > +static bool is_ppp_proto_supported(__be16 proto)
> > +{
> > +	switch (proto) {
> > +	case htons(PPP_AT):
> > +	case htons(PPP_IPX):
> > +	case htons(PPP_VJC_COMP):
> > +	case htons(PPP_VJC_UNCOMP):
> > +	case htons(PPP_MP):
> > +	case htons(PPP_COMPFRAG):
> > +	case htons(PPP_COMP):
> > +	case htons(PPP_MPLS_UC):
> > +	case htons(PPP_MPLS_MC):
> > +	case htons(PPP_IPCP):
> > +	case htons(PPP_ATCP):
> > +	case htons(PPP_IPXCP):
> > +	case htons(PPP_IPV6CP):
> > +	case htons(PPP_CCPFRAG):
> > +	case htons(PPP_MPLSCP):
> > +	case htons(PPP_LCP):
> > +	case htons(PPP_PAP):
> > +	case htons(PPP_LQR):
> > +	case htons(PPP_CHAP):
> > +	case htons(PPP_CBCP):
> > +		return true;
> > +	default:
> > +		return false;
> > +	}
> > +}
> > +
> >  /**
> >   * __skb_flow_dissect - extract the flow_keys struct and return it
> >   * @net: associated network namespace, derived from @skb if NULL
> > @@ -1221,19 +1254,29 @@ bool __skb_flow_dissect(const struct net *net,
> >  		}
> >
> >  		nhoff += PPPOE_SES_HLEN;
> > -		switch (hdr->proto) {
> > -		case htons(PPP_IP):
> > +		if (hdr->proto == htons(PPP_IP)) {
> >  			proto = htons(ETH_P_IP);
> >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > -			break;
> > -		case htons(PPP_IPV6):
> > +		} else if (hdr->proto == htons(PPP_IPV6)) {
> >  			proto = htons(ETH_P_IPV6);
> >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > -			break;
> 
> 1)
> Looks like you could easily handle MPLS too. Did you skip it on
> purpose? (not enough users to justify writing and maintaining
> the code?).
> 
> I don't mean MPLS has to be supported; I'd just like to know if it was
> considered.

Yes, exactly as you write, not enough users, but I can see thet MPLS should 
be easy to implement so I'll include it in the next version.

> 
> 2)
> Also this whole test is a bit weak: the version, type and code fields
> must have precise values for the PPPoE Session packet to be valid.
> If either version or type is different than 1, then the packet
> advertises a new version of the protocol that we don't know how to parse
> (or most probably the packet was forged or corrupted). A non-zero code
> is also invalid.
> 
> I know the code was already like this before your patch, but it's
> probably better to fix it before implementing hardware offload.

Sure, I'll add packet validation in next version.

> 
> 3)
> Finally, the PPP protocol could be compressed and stored in 1 byte
> instead of 2. This case wasn't handled before your patch, but I think
> that should be fixed too before implementing hardware offload.

We faced that issue but we couldn't find out what indicates
when ppp protocol is stored in 1 byte instead of 2.

> 
> > -		default:
> > +		} else if (is_ppp_proto_supported(hdr->proto)) {
> > +			fdret = FLOW_DISSECT_RET_OUT_GOOD;
> > +		} else {
> >  			fdret = FLOW_DISSECT_RET_OUT_BAD;
> >  			break;
> >  		}
> > +
> > +		if (dissector_uses_key(flow_dissector,
> > +				       FLOW_DISSECTOR_KEY_PPPOE)) {
> > +			struct flow_dissector_key_pppoe *key_pppoe;
> > +
> > +			key_pppoe = skb_flow_dissector_target(flow_dissector,
> > +							      FLOW_DISSECTOR_KEY_PPPOE,
> > +							      target_container);
> > +			key_pppoe->session_id = ntohs(hdr->hdr.sid);
> > +			key_pppoe->ppp_proto = hdr->proto;
> > +		}
> >  		break;
> >  	}
> >  	case htons(ETH_P_TIPC): {
> > --
> > 2.35.1
> >
Guillaume Nault July 1, 2022, 12:41 p.m. UTC | #3
On Fri, Jul 01, 2022 at 10:53:51AM +0000, Drewek, Wojciech wrote:
> > > +/**
> > > + * struct flow_dissector_key_pppoe:
> > > + * @session_id: pppoe session id
> > > + * @ppp_proto: ppp protocol
> > > + */
> > > +struct flow_dissector_key_pppoe {
> > > +	u16 session_id;
> > > +	__be16 ppp_proto;
> > > +};
> > 
> > Why isn't session_id __be16 too?
> 
> I've got general impression that storing protocols values
> in big endian is a standard through out the code and other values like vlan_id
> don't have to be stored in big endian, but maybe it's just my illusion :)
> I can change that in v3.

I don't know of any written rule, but looking at other keys, every
protocol field is stored with the endianess used on the wire. Only
metadata are stored in host byte order. For flow_dissector_key_vlan,
vlan_id is a bit special since it's only 12 bits long, but other vlan
fields are stored in big endian (vlan_tci is __be16 for example). And
vlan ids are special for another reason too: they're also metadata
stored in skbuffs because of vlan hardware offload.

But PPPoE Session Id is clearly read from the packet header, where it's
stored in network byte order.

> > Also, I'm not sure I like mixing the PPPoE session ID and PPP protocol
> > fields in the same structure: they're part of two different protocols.
> > However, I can't anticipate any technical problem in doing so, and I
> > guess there's no easy way to let the flow dissector parse the PPP
> > header independently. So well, maybe we don't have choice...
> 
> We are not planning to match on other fields from PPP protocol so
> separate structure just for it is not needed I guess.

FTR, I believe it's okay to take this shortcut but for different
reasons:

 * When transported over PPPoE, PPP frames are not supposed to have
   address and control fields. Therefore, in this case, the PPP header
   is limitted to the protocol field, so the dissector key would never
   have to be extended.

 * It's unlikely enough that we'd ever have any other protocol
   transporting PPP frames to implement in the flow dissector.
   Therefore, independent PPP dissection code probably won't be needed
   (even if one wants to add support for L2TP or PPTP in the flow
   dissector, that probably should be done with tunnel metadata, like
   VXLAN).

 * We have gotos for jumping to "network" or "transport" header dissection
   (proto_again and ip_proto_again), but no place to restart at the "link"
   header and no way to tell what type of link layer header we're
   requesting to parse (Ethernet or PPP).

For all these reasons, I believe your approach is an acceptable
shortcut. But I don't buy the "let's limit the flow dissector to what
we plan to support in ice" argument.

> > > @@ -1221,19 +1254,29 @@ bool __skb_flow_dissect(const struct net *net,
> > >  		}
> > >
> > >  		nhoff += PPPOE_SES_HLEN;
> > > -		switch (hdr->proto) {
> > > -		case htons(PPP_IP):
> > > +		if (hdr->proto == htons(PPP_IP)) {
> > >  			proto = htons(ETH_P_IP);
> > >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > > -			break;
> > > -		case htons(PPP_IPV6):
> > > +		} else if (hdr->proto == htons(PPP_IPV6)) {
> > >  			proto = htons(ETH_P_IPV6);
> > >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > > -			break;
> > 
> > 1)
> > Looks like you could easily handle MPLS too. Did you skip it on
> > purpose? (not enough users to justify writing and maintaining
> > the code?).
> > 
> > I don't mean MPLS has to be supported; I'd just like to know if it was
> > considered.
> 
> Yes, exactly as you write, not enough users, but I can see thet MPLS should
> be easy to implement so I'll include it in the next version.

Okay.

> > 2)
> > Also this whole test is a bit weak: the version, type and code fields
> > must have precise values for the PPPoE Session packet to be valid.
> > If either version or type is different than 1, then the packet
> > advertises a new version of the protocol that we don't know how to parse
> > (or most probably the packet was forged or corrupted). A non-zero code
> > is also invalid.
> > 
> > I know the code was already like this before your patch, but it's
> > probably better to fix it before implementing hardware offload.
> 
> Sure, I'll add packet validation in next version.

Great!

> > 3)
> > Finally, the PPP protocol could be compressed and stored in 1 byte
> > instead of 2. This case wasn't handled before your patch, but I think
> > that should be fixed too before implementing hardware offload.
> 
> We faced that issue but we couldn't find out what indicates
> when ppp protocol is stored in 1 byte instead of 2.

That depends on the least significant bit of the first byte. If it's 0
then the next byte is also part of the protocol field. If it's one,
the protocol is "compressed" (that is the high order 0x00 byte has been
stripped and we're left with only the least significant byte).

This is explained more formally in RFC 1661 section 2 (PPP Encapsulation):
  https://datatracker.ietf.org/doc/html/rfc1661#section-2

and section 6.5 (Protocol-Field-Compression (PFC)):
  https://datatracker.ietf.org/doc/html/rfc1661#section-6.5

There should be no reason to use this old PPP feature with PPPoE, but
it's still valid (even though it breaks IP header alignment).
Wojciech Drewek July 1, 2022, 1:33 p.m. UTC | #4
> -----Original Message-----
> From: Guillaume Nault <gnault@redhat.com>
> Sent: piątek, 1 lipca 2022 14:42
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: Marcin Szycik <marcin.szycik@linux.intel.com>; netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>;
> davem@davemloft.net; xiyou.wangcong@gmail.com; Brandeburg, Jesse <jesse.brandeburg@intel.com>; gustavoars@kernel.org;
> baowen.zheng@corigine.com; boris.sukholitko@broadcom.com; edumazet@google.com; kuba@kernel.org; jhs@mojatatu.com;
> jiri@resnulli.us; kurt@linutronix.de; pablo@netfilter.org; pabeni@redhat.com; paulb@nvidia.com; simon.horman@corigine.com;
> komachi.yoshiki@gmail.com; zhangkaiheb@126.com; intel-wired-lan@lists.osuosl.org; michal.swiatkowski@linux.intel.com; Lobakin,
> Alexandr <alexandr.lobakin@intel.com>; mostrows@earthlink.net; paulus@samba.org
> Subject: Re: [RFC PATCH net-next v3 1/4] flow_dissector: Add PPPoE dissectors
> 
> On Fri, Jul 01, 2022 at 10:53:51AM +0000, Drewek, Wojciech wrote:
> > > > +/**
> > > > + * struct flow_dissector_key_pppoe:
> > > > + * @session_id: pppoe session id
> > > > + * @ppp_proto: ppp protocol
> > > > + */
> > > > +struct flow_dissector_key_pppoe {
> > > > +	u16 session_id;
> > > > +	__be16 ppp_proto;
> > > > +};
> > >
> > > Why isn't session_id __be16 too?
> >
> > I've got general impression that storing protocols values
> > in big endian is a standard through out the code and other values like vlan_id
> > don't have to be stored in big endian, but maybe it's just my illusion :)
> > I can change that in v3.
> 
> I don't know of any written rule, but looking at other keys, every
> protocol field is stored with the endianess used on the wire. Only
> metadata are stored in host byte order. For flow_dissector_key_vlan,
> vlan_id is a bit special since it's only 12 bits long, but other vlan
> fields are stored in big endian (vlan_tci is __be16 for example). And
> vlan ids are special for another reason too: they're also metadata
> stored in skbuffs because of vlan hardware offload.
> 
> But PPPoE Session Id is clearly read from the packet header, where it's
> stored in network byte order.

Thanks for explanation! We'll use __be16 for session_id since now.

> 
> > > Also, I'm not sure I like mixing the PPPoE session ID and PPP protocol
> > > fields in the same structure: they're part of two different protocols.
> > > However, I can't anticipate any technical problem in doing so, and I
> > > guess there's no easy way to let the flow dissector parse the PPP
> > > header independently. So well, maybe we don't have choice...
> >
> > We are not planning to match on other fields from PPP protocol so
> > separate structure just for it is not needed I guess.
> 
> FTR, I believe it's okay to take this shortcut but for different
> reasons:
> 
>  * When transported over PPPoE, PPP frames are not supposed to have
>    address and control fields. Therefore, in this case, the PPP header
>    is limitted to the protocol field, so the dissector key would never
>    have to be extended.
> 
>  * It's unlikely enough that we'd ever have any other protocol
>    transporting PPP frames to implement in the flow dissector.
>    Therefore, independent PPP dissection code probably won't be needed
>    (even if one wants to add support for L2TP or PPTP in the flow
>    dissector, that probably should be done with tunnel metadata, like
>    VXLAN).
> 
>  * We have gotos for jumping to "network" or "transport" header dissection
>    (proto_again and ip_proto_again), but no place to restart at the "link"
>    header and no way to tell what type of link layer header we're
>    requesting to parse (Ethernet or PPP).
> 
> For all these reasons, I believe your approach is an acceptable
> shortcut. But I don't buy the "let's limit the flow dissector to what
> we plan to support in ice" argument.

Again thanks for explanation. Sorry, I didn't want to suggest that flow_dissector
should be designed based only on our needs. We are happy to change our
implementation if requested.

We will stay with the current approach if this is the conclusion.

> 
> > > > @@ -1221,19 +1254,29 @@ bool __skb_flow_dissect(const struct net *net,
> > > >  		}
> > > >
> > > >  		nhoff += PPPOE_SES_HLEN;
> > > > -		switch (hdr->proto) {
> > > > -		case htons(PPP_IP):
> > > > +		if (hdr->proto == htons(PPP_IP)) {
> > > >  			proto = htons(ETH_P_IP);
> > > >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > > > -			break;
> > > > -		case htons(PPP_IPV6):
> > > > +		} else if (hdr->proto == htons(PPP_IPV6)) {
> > > >  			proto = htons(ETH_P_IPV6);
> > > >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > > > -			break;
> > >
> > > 1)
> > > Looks like you could easily handle MPLS too. Did you skip it on
> > > purpose? (not enough users to justify writing and maintaining
> > > the code?).
> > >
> > > I don't mean MPLS has to be supported; I'd just like to know if it was
> > > considered.
> >
> > Yes, exactly as you write, not enough users, but I can see thet MPLS should
> > be easy to implement so I'll include it in the next version.
> 
> Okay.
> 
> > > 2)
> > > Also this whole test is a bit weak: the version, type and code fields
> > > must have precise values for the PPPoE Session packet to be valid.
> > > If either version or type is different than 1, then the packet
> > > advertises a new version of the protocol that we don't know how to parse
> > > (or most probably the packet was forged or corrupted). A non-zero code
> > > is also invalid.
> > >
> > > I know the code was already like this before your patch, but it's
> > > probably better to fix it before implementing hardware offload.
> >
> > Sure, I'll add packet validation in next version.
> 
> Great!
> 
> > > 3)
> > > Finally, the PPP protocol could be compressed and stored in 1 byte
> > > instead of 2. This case wasn't handled before your patch, but I think
> > > that should be fixed too before implementing hardware offload.
> >
> > We faced that issue but we couldn't find out what indicates
> > when ppp protocol is stored in 1 byte instead of 2.
> 
> That depends on the least significant bit of the first byte. If it's 0
> then the next byte is also part of the protocol field. If it's one,
> the protocol is "compressed" (that is the high order 0x00 byte has been
> stripped and we're left with only the least significant byte).
> 
> This is explained more formally in RFC 1661 section 2 (PPP Encapsulation):
>   https://datatracker.ietf.org/doc/html/rfc1661#section-2
> 
> and section 6.5 (Protocol-Field-Compression (PFC)):
>   https://datatracker.ietf.org/doc/html/rfc1661#section-6.5
> 
> There should be no reason to use this old PPP feature with PPPoE, but
> it's still valid (even though it breaks IP header alignment).

Thanks for explanation! From the next version we will support both options.
diff mbox series

Patch

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index a4c6057c7097..8ff40c7c3f1c 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -261,6 +261,16 @@  struct flow_dissector_key_num_of_vlans {
 	u8 num_of_vlans;
 };
 
+/**
+ * struct flow_dissector_key_pppoe:
+ * @session_id: pppoe session id
+ * @ppp_proto: ppp protocol
+ */
+struct flow_dissector_key_pppoe {
+	u16 session_id;
+	__be16 ppp_proto;
+};
+
 enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
 	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -291,6 +301,7 @@  enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
 	FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
 	FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
+	FLOW_DISSECTOR_KEY_PPPOE,  /* struct flow_dissector_key_pppoe */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 6aee04f75e3e..42393af477a2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -895,6 +895,39 @@  bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 	return result == BPF_OK;
 }
 
+/**
+ * is_ppp_proto_supported - checks if inner PPP protocol should be dissected
+ * @proto: protocol type (PPP proto field)
+ */
+static bool is_ppp_proto_supported(__be16 proto)
+{
+	switch (proto) {
+	case htons(PPP_AT):
+	case htons(PPP_IPX):
+	case htons(PPP_VJC_COMP):
+	case htons(PPP_VJC_UNCOMP):
+	case htons(PPP_MP):
+	case htons(PPP_COMPFRAG):
+	case htons(PPP_COMP):
+	case htons(PPP_MPLS_UC):
+	case htons(PPP_MPLS_MC):
+	case htons(PPP_IPCP):
+	case htons(PPP_ATCP):
+	case htons(PPP_IPXCP):
+	case htons(PPP_IPV6CP):
+	case htons(PPP_CCPFRAG):
+	case htons(PPP_MPLSCP):
+	case htons(PPP_LCP):
+	case htons(PPP_PAP):
+	case htons(PPP_LQR):
+	case htons(PPP_CHAP):
+	case htons(PPP_CBCP):
+		return true;
+	default:
+		return false;
+	}
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @net: associated network namespace, derived from @skb if NULL
@@ -1221,19 +1254,29 @@  bool __skb_flow_dissect(const struct net *net,
 		}
 
 		nhoff += PPPOE_SES_HLEN;
-		switch (hdr->proto) {
-		case htons(PPP_IP):
+		if (hdr->proto == htons(PPP_IP)) {
 			proto = htons(ETH_P_IP);
 			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-			break;
-		case htons(PPP_IPV6):
+		} else if (hdr->proto == htons(PPP_IPV6)) {
 			proto = htons(ETH_P_IPV6);
 			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-			break;
-		default:
+		} else if (is_ppp_proto_supported(hdr->proto)) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		} else {
 			fdret = FLOW_DISSECT_RET_OUT_BAD;
 			break;
 		}
+
+		if (dissector_uses_key(flow_dissector,
+				       FLOW_DISSECTOR_KEY_PPPOE)) {
+			struct flow_dissector_key_pppoe *key_pppoe;
+
+			key_pppoe = skb_flow_dissector_target(flow_dissector,
+							      FLOW_DISSECTOR_KEY_PPPOE,
+							      target_container);
+			key_pppoe->session_id = ntohs(hdr->hdr.sid);
+			key_pppoe->ppp_proto = hdr->proto;
+		}
 		break;
 	}
 	case htons(ETH_P_TIPC): {