diff mbox series

[bpf-next,v2,09/20] xdp: Add VLAN tag hint

Message ID 20230703181226.19380-10-larysa.zaremba@intel.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series XDP metadata via kfuncs for ice | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 4172 this patch: 4173
netdev/cc_maintainers warning 6 maintainers not CCed: hawk@kernel.org corbet@lwn.net davem@davemloft.net pabeni@redhat.com edumazet@google.com linux-doc@vger.kernel.org
netdev/build_clang fail Errors and warnings before: 897 this patch: 897
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 4389 this patch: 4390
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Larysa Zaremba July 3, 2023, 6:12 p.m. UTC
Implement functionality that enables drivers to expose VLAN tag
to XDP code.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
 include/linux/netdevice.h                    |  2 ++
 include/net/xdp.h                            |  2 ++
 kernel/bpf/offload.c                         |  2 ++
 net/core/xdp.c                               | 20 ++++++++++++++++++++
 5 files changed, 33 insertions(+), 1 deletion(-)

Comments

John Fastabend July 3, 2023, 8:15 p.m. UTC | #1
Larysa Zaremba wrote:
> Implement functionality that enables drivers to expose VLAN tag
> to XDP code.
> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
>  Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
>  include/linux/netdevice.h                    |  2 ++
>  include/net/xdp.h                            |  2 ++
>  kernel/bpf/offload.c                         |  2 ++
>  net/core/xdp.c                               | 20 ++++++++++++++++++++
>  5 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
> index 25ce72af81c2..ea6dd79a21d3 100644
> --- a/Documentation/networking/xdp-rx-metadata.rst
> +++ b/Documentation/networking/xdp-rx-metadata.rst
> @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
>  metadata is supported, this set will grow:
>  
>  .. kernel-doc:: net/core/xdp.c
> -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
> +   :identifiers: bpf_xdp_metadata_rx_timestamp
> +
> +.. kernel-doc:: net/core/xdp.c
> +   :identifiers: bpf_xdp_metadata_rx_hash
> +
> +.. kernel-doc:: net/core/xdp.c
> +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
>  
>  An XDP program can use these kfuncs to read the metadata into stack
>  variables for its own consumption. Or, to pass the metadata on to other
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index b828c7a75be2..4fa4380e6d89 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1658,6 +1658,8 @@ struct xdp_metadata_ops {
>  	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
>  	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
>  			       enum xdp_rss_hash_type *rss_type);
> +	int	(*xmo_rx_vlan_tag)(const struct xdp_md *ctx, u16 *vlan_tag,
> +				   __be16 *vlan_proto);
>  };
>  
>  /**
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 6381560efae2..89c58f56ffc6 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -389,6 +389,8 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
>  			   bpf_xdp_metadata_rx_timestamp) \
>  	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
>  			   bpf_xdp_metadata_rx_hash) \
> +	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
> +			   bpf_xdp_metadata_rx_vlan_tag) \
>  
>  enum {
>  #define XDP_METADATA_KFUNC(name, _) name,
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 8a26cd8814c1..986e7becfd42 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -848,6 +848,8 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
>  		p = ops->xmo_rx_timestamp;
>  	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
>  		p = ops->xmo_rx_hash;
> +	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_VLAN_TAG))
> +		p = ops->xmo_rx_vlan_tag;
>  out:
>  	up_read(&bpf_devs_lock);
>  
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 41e5ca8643ec..f6262c90e45f 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>  	return -EOPNOTSUPP;
>  }
>  
> +/**
> + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
> + * @ctx: XDP context pointer.
> + * @vlan_tag: Destination pointer for VLAN tag
> + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
> + *
> + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
> + * containing VLAN ID, vlan_proto contains protocol identifier.

Above is a bit confusing to me at least.

The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
are to be included here? The VlanID or the full 16bit TCI meaning the
PCP+DEI+VID? I think by "including 12 least significant bytes" you
mean bits, but also not clear about those 4 other bits.

I can likely figure it out in next patches from implementation but
would be nice to clean up docs.

> + *
> + * Return:
> + * * Returns 0 on success or ``-errno`` on error.
> + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
> + * * ``-ENODATA``    : VLAN tag was not stripped or is not available
> + */
> +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag,
> +					     __be16 *vlan_proto)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
>  __diag_pop();
>  
>  BTF_SET8_START(xdp_metadata_kfunc_ids)
> -- 
> 2.41.0
>
Larysa Zaremba July 4, 2023, 8:23 a.m. UTC | #2
On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
> Larysa Zaremba wrote:
> > Implement functionality that enables drivers to expose VLAN tag
> > to XDP code.
> > 
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> >  Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
> >  include/linux/netdevice.h                    |  2 ++
> >  include/net/xdp.h                            |  2 ++
> >  kernel/bpf/offload.c                         |  2 ++
> >  net/core/xdp.c                               | 20 ++++++++++++++++++++
> >  5 files changed, 33 insertions(+), 1 deletion(-)
> > 
> > diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
> > index 25ce72af81c2..ea6dd79a21d3 100644
> > --- a/Documentation/networking/xdp-rx-metadata.rst
> > +++ b/Documentation/networking/xdp-rx-metadata.rst
> > @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
> >  metadata is supported, this set will grow:
> >  
> >  .. kernel-doc:: net/core/xdp.c
> > -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
> > +   :identifiers: bpf_xdp_metadata_rx_timestamp
> > +
> > +.. kernel-doc:: net/core/xdp.c
> > +   :identifiers: bpf_xdp_metadata_rx_hash
> > +
> > +.. kernel-doc:: net/core/xdp.c
> > +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
> >  
> >  An XDP program can use these kfuncs to read the metadata into stack
> >  variables for its own consumption. Or, to pass the metadata on to other
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index b828c7a75be2..4fa4380e6d89 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1658,6 +1658,8 @@ struct xdp_metadata_ops {
> >  	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
> >  	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
> >  			       enum xdp_rss_hash_type *rss_type);
> > +	int	(*xmo_rx_vlan_tag)(const struct xdp_md *ctx, u16 *vlan_tag,
> > +				   __be16 *vlan_proto);
> >  };
> >  
> >  /**
> > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > index 6381560efae2..89c58f56ffc6 100644
> > --- a/include/net/xdp.h
> > +++ b/include/net/xdp.h
> > @@ -389,6 +389,8 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
> >  			   bpf_xdp_metadata_rx_timestamp) \
> >  	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
> >  			   bpf_xdp_metadata_rx_hash) \
> > +	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
> > +			   bpf_xdp_metadata_rx_vlan_tag) \
> >  
> >  enum {
> >  #define XDP_METADATA_KFUNC(name, _) name,
> > diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> > index 8a26cd8814c1..986e7becfd42 100644
> > --- a/kernel/bpf/offload.c
> > +++ b/kernel/bpf/offload.c
> > @@ -848,6 +848,8 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
> >  		p = ops->xmo_rx_timestamp;
> >  	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
> >  		p = ops->xmo_rx_hash;
> > +	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_VLAN_TAG))
> > +		p = ops->xmo_rx_vlan_tag;
> >  out:
> >  	up_read(&bpf_devs_lock);
> >  
> > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > index 41e5ca8643ec..f6262c90e45f 100644
> > --- a/net/core/xdp.c
> > +++ b/net/core/xdp.c
> > @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> >  	return -EOPNOTSUPP;
> >  }
> >  
> > +/**
> > + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
> > + * @ctx: XDP context pointer.
> > + * @vlan_tag: Destination pointer for VLAN tag
> > + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
> > + *
> > + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
> > + * containing VLAN ID, vlan_proto contains protocol identifier.
> 
> Above is a bit confusing to me at least.
> 
> The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
> are to be included here? The VlanID or the full 16bit TCI meaning the
> PCP+DEI+VID?

It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to 
xdp_hw_metadata") this is more clear, because the tag is parsed.

What about rephrasing it this way:

In case of success, vlan_proto contains VLAN protocol identifier (TPID), 
vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).

> I think by "including 12 least significant bytes" you
> mean bits,

Yes, my bad.

> but also not clear about those 4 other bits.
> 
> I can likely figure it out in next patches from implementation but
> would be nice to clean up docs.
> 
> > + *
> > + * Return:
> > + * * Returns 0 on success or ``-errno`` on error.
> > + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
> > + * * ``-ENODATA``    : VLAN tag was not stripped or is not available
> > + */
> > +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag,
> > +					     __be16 *vlan_proto)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
> >  __diag_pop();
> >  
> >  BTF_SET8_START(xdp_metadata_kfunc_ids)
> > -- 
> > 2.41.0
> > 
> 
> 
>
Jesper Dangaard Brouer July 4, 2023, 10:23 a.m. UTC | #3
On 04/07/2023 10.23, Larysa Zaremba wrote:
> On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
>> Larysa Zaremba wrote:
>>> Implement functionality that enables drivers to expose VLAN tag
>>> to XDP code.
>>>
>>> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
>>> ---
>>>   Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
>>>   include/linux/netdevice.h                    |  2 ++
>>>   include/net/xdp.h                            |  2 ++
>>>   kernel/bpf/offload.c                         |  2 ++
>>>   net/core/xdp.c                               | 20 ++++++++++++++++++++
>>>   5 files changed, 33 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
>>> index 25ce72af81c2..ea6dd79a21d3 100644
>>> --- a/Documentation/networking/xdp-rx-metadata.rst
>>> +++ b/Documentation/networking/xdp-rx-metadata.rst
>>> @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
>>>   metadata is supported, this set will grow:
>>>   
>>>   .. kernel-doc:: net/core/xdp.c
>>> -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
>>> +   :identifiers: bpf_xdp_metadata_rx_timestamp
>>> +
>>> +.. kernel-doc:: net/core/xdp.c
>>> +   :identifiers: bpf_xdp_metadata_rx_hash
>>> +
>>> +.. kernel-doc:: net/core/xdp.c
>>> +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
>>>   
>>>   An XDP program can use these kfuncs to read the metadata into stack
>>>   variables for its own consumption. Or, to pass the metadata on to other
[...]
>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>> index 41e5ca8643ec..f6262c90e45f 100644
>>> --- a/net/core/xdp.c
>>> +++ b/net/core/xdp.c
>>> @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>>>   	return -EOPNOTSUPP;
>>>   }
>>>   
>>> +/**
>>> + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
>>> + * @ctx: XDP context pointer.
>>> + * @vlan_tag: Destination pointer for VLAN tag
>>> + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
>>> + *
>>> + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
>>> + * containing VLAN ID, vlan_proto contains protocol identifier.
>>
>> Above is a bit confusing to me at least.
>>
>> The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
>> are to be included here? The VlanID or the full 16bit TCI meaning the
>> PCP+DEI+VID?
> 
> It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to
> xdp_hw_metadata") this is more clear, because the tag is parsed.
> 

Do we really care about the "EtherType" proto (in VLAN speak TPID = Tag
Protocol IDentifier)?
I mean, it can basically only have two values[1], and we just wanted to
know if it is a VLAN (that hardware offloaded/removed for us):

  static __always_inline int proto_is_vlan(__u16 h_proto)
  {
	return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
		  h_proto == bpf_htons(ETH_P_8021AD));
  }

[1] 
https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L75-L79

Cc. Andrew Lunn, as I notice DSA have a fake VLAN define ETH_P_DSA_8021Q
(in file include/uapi/linux/if_ether.h)
Is this actually in use?
Maybe some hardware can "VLAN" offload this?


> What about rephrasing it this way:
> 
> In case of success, vlan_proto contains VLAN protocol identifier (TPID),
> vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).
> 

Hmm, I think we can improve this further. This text becomes part of the
documentation for end-users (target audience).  Thus, I think it is
worth being more verbose and even mention the existing defines that we
are expecting end-users to take advantage of.

What about:

In case of success. The VLAN EtherType is stored in vlan_proto (usually
either ETH_P_8021Q or ETH_P_8021AD) also known as TPID (Tag Protocol
IDentifier). The VLAN tag is stored in vlan_tag, which is a 16-bit field
containing sub-fields (PCP+DEI+VID). The VLAN ID (VID) is 12-bits
commonly extracted using mask VLAN_VID_MASK (0x0fff).  For the meaning
of the sub-fields Priority Code Point (PCP) and Drop Eligible Indicator
(DEI) (formerly CFI) please reference other documentation. Remember
these 16-bit fields are stored in network-byte. Thus, transformation
with byte-order helper functions like bpf_ntohs() are needed.



>> I think by "including 12 least significant bytes" you
>> mean bits,
> 
> Yes, my bad.
> 
>> but also not clear about those 4 other bits.
>>
>> I can likely figure it out in next patches from implementation but
>> would be nice to clean up docs.
>>
>>> + *
>>> + * Return:
>>> + * * Returns 0 on success or ``-errno`` on error.
>>> + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
>>> + * * ``-ENODATA``    : VLAN tag was not stripped or is not available
>>> + */
>>> +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag,
>>> +					     __be16 *vlan_proto)
>>> +{
>>> +	return -EOPNOTSUPP;
>>> +}
>>> +
Larysa Zaremba July 4, 2023, 11:02 a.m. UTC | #4
On Tue, Jul 04, 2023 at 12:23:45PM +0200, Jesper Dangaard Brouer wrote:
> 
> 
> On 04/07/2023 10.23, Larysa Zaremba wrote:
> > On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
> > > Larysa Zaremba wrote:
> > > > Implement functionality that enables drivers to expose VLAN tag
> > > > to XDP code.
> > > > 
> > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > ---
> > > >   Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
> > > >   include/linux/netdevice.h                    |  2 ++
> > > >   include/net/xdp.h                            |  2 ++
> > > >   kernel/bpf/offload.c                         |  2 ++
> > > >   net/core/xdp.c                               | 20 ++++++++++++++++++++
> > > >   5 files changed, 33 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
> > > > index 25ce72af81c2..ea6dd79a21d3 100644
> > > > --- a/Documentation/networking/xdp-rx-metadata.rst
> > > > +++ b/Documentation/networking/xdp-rx-metadata.rst
> > > > @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
> > > >   metadata is supported, this set will grow:
> > > >   .. kernel-doc:: net/core/xdp.c
> > > > -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
> > > > +   :identifiers: bpf_xdp_metadata_rx_timestamp
> > > > +
> > > > +.. kernel-doc:: net/core/xdp.c
> > > > +   :identifiers: bpf_xdp_metadata_rx_hash
> > > > +
> > > > +.. kernel-doc:: net/core/xdp.c
> > > > +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
> > > >   An XDP program can use these kfuncs to read the metadata into stack
> > > >   variables for its own consumption. Or, to pass the metadata on to other
> [...]
> > > > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > > > index 41e5ca8643ec..f6262c90e45f 100644
> > > > --- a/net/core/xdp.c
> > > > +++ b/net/core/xdp.c
> > > > @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> > > >   	return -EOPNOTSUPP;
> > > >   }
> > > > +/**
> > > > + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
> > > > + * @ctx: XDP context pointer.
> > > > + * @vlan_tag: Destination pointer for VLAN tag
> > > > + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
> > > > + *
> > > > + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
> > > > + * containing VLAN ID, vlan_proto contains protocol identifier.
> > > 
> > > Above is a bit confusing to me at least.
> > > 
> > > The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
> > > are to be included here? The VlanID or the full 16bit TCI meaning the
> > > PCP+DEI+VID?
> > 
> > It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to
> > xdp_hw_metadata") this is more clear, because the tag is parsed.
> > 
> 
> Do we really care about the "EtherType" proto (in VLAN speak TPID = Tag
> Protocol IDentifier)?
> I mean, it can basically only have two values[1], and we just wanted to
> know if it is a VLAN (that hardware offloaded/removed for us):

If we assume everyone follows the standard, this would be correct.
But apparently, some applications use some ambiguous value as a TPID [0].

So it is not hard to imagine, some NICs could alllow you to configure your 
custom TPID. I am not sure if any in-tree drivers actually do this, but I think 
it's nice to provide some flexibility on XDP level, especially considering 
network stack stores full vlan_proto.

[0] 
https://techhub.hpe.com/eginfolib/networking/docs/switches/7500/5200-1938a_l2-lan_cg/content/495503472.htm

> 
>  static __always_inline int proto_is_vlan(__u16 h_proto)
>  {
> 	return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
> 		  h_proto == bpf_htons(ETH_P_8021AD));
>  }
> 
> [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L75-L79
> 
> Cc. Andrew Lunn, as I notice DSA have a fake VLAN define ETH_P_DSA_8021Q
> (in file include/uapi/linux/if_ether.h)
> Is this actually in use?
> Maybe some hardware can "VLAN" offload this?
> 
> 
> > What about rephrasing it this way:
> > 
> > In case of success, vlan_proto contains VLAN protocol identifier (TPID),
> > vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).
> > 
> 
> Hmm, I think we can improve this further. This text becomes part of the
> documentation for end-users (target audience).  Thus, I think it is
> worth being more verbose and even mention the existing defines that we
> are expecting end-users to take advantage of.
> 
> What about:
> 
> In case of success. The VLAN EtherType is stored in vlan_proto (usually
> either ETH_P_8021Q or ETH_P_8021AD) also known as TPID (Tag Protocol
> IDentifier). The VLAN tag is stored in vlan_tag, which is a 16-bit field
> containing sub-fields (PCP+DEI+VID). The VLAN ID (VID) is 12-bits
> commonly extracted using mask VLAN_VID_MASK (0x0fff).  For the meaning
> of the sub-fields Priority Code Point (PCP) and Drop Eligible Indicator
> (DEI) (formerly CFI) please reference other documentation. Remember
> these 16-bit fields are stored in network-byte. Thus, transformation
> with byte-order helper functions like bpf_ntohs() are needed.
> 

AFAIK, vlan_tag is stored in host byte order, this is how it is in skb.
In ice, we receive VLAN tag in descriptor already in LE.
Only protocol is BE (network byte order). So I would replace the last 2 
sentences with the following:

vlan_tag is stored in host byte order, so no byte order conversion is needed.
vlan_proto is stored in network byte order, the suggested way to use this value:

vlan_proto == bpf_htons(ETH_P_8021Q)

> 
> 
> > > I think by "including 12 least significant bytes" you
> > > mean bits,
> > 
> > Yes, my bad.
> > 
> > > but also not clear about those 4 other bits.
> > > 
> > > I can likely figure it out in next patches from implementation but
> > > would be nice to clean up docs.
> > > 
> > > > + *
> > > > + * Return:
> > > > + * * Returns 0 on success or ``-errno`` on error.
> > > > + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
> > > > + * * ``-ENODATA``    : VLAN tag was not stripped or is not available
> > > > + */
> > > > +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag,
> > > > +					     __be16 *vlan_proto)
> > > > +{
> > > > +	return -EOPNOTSUPP;
> > > > +}
> > > > +
> 
>
Jesper Dangaard Brouer July 4, 2023, 2:18 p.m. UTC | #5
On 04/07/2023 13.02, Larysa Zaremba wrote:
> On Tue, Jul 04, 2023 at 12:23:45PM +0200, Jesper Dangaard Brouer wrote:
>>
>> On 04/07/2023 10.23, Larysa Zaremba wrote:
>>> On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
>>>> Larysa Zaremba wrote:
>>>>> Implement functionality that enables drivers to expose VLAN tag
>>>>> to XDP code.
>>>>>
>>>>> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
>>>>> ---
>>>>>    Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
>>>>>    include/linux/netdevice.h                    |  2 ++
>>>>>    include/net/xdp.h                            |  2 ++
>>>>>    kernel/bpf/offload.c                         |  2 ++
>>>>>    net/core/xdp.c                               | 20 ++++++++++++++++++++
>>>>>    5 files changed, 33 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
>>>>> index 25ce72af81c2..ea6dd79a21d3 100644
>>>>> --- a/Documentation/networking/xdp-rx-metadata.rst
>>>>> +++ b/Documentation/networking/xdp-rx-metadata.rst
>>>>> @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
>>>>>    metadata is supported, this set will grow:
>>>>>    .. kernel-doc:: net/core/xdp.c
>>>>> -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
>>>>> +   :identifiers: bpf_xdp_metadata_rx_timestamp
>>>>> +
>>>>> +.. kernel-doc:: net/core/xdp.c
>>>>> +   :identifiers: bpf_xdp_metadata_rx_hash
>>>>> +
>>>>> +.. kernel-doc:: net/core/xdp.c
>>>>> +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
>>>>>    An XDP program can use these kfuncs to read the metadata into stack
>>>>>    variables for its own consumption. Or, to pass the metadata on to other
>> [...]
>>>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>>>> index 41e5ca8643ec..f6262c90e45f 100644
>>>>> --- a/net/core/xdp.c
>>>>> +++ b/net/core/xdp.c
>>>>> @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>>>>>    	return -EOPNOTSUPP;
>>>>>    }
>>>>> +/**
>>>>> + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
>>>>> + * @ctx: XDP context pointer.
>>>>> + * @vlan_tag: Destination pointer for VLAN tag
>>>>> + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
>>>>> + *
>>>>> + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
>>>>> + * containing VLAN ID, vlan_proto contains protocol identifier.
>>>>
>>>> Above is a bit confusing to me at least.
>>>>
>>>> The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
>>>> are to be included here? The VlanID or the full 16bit TCI meaning the
>>>> PCP+DEI+VID?
>>>
>>> It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to
>>> xdp_hw_metadata") this is more clear, because the tag is parsed.
>>>
>>
>> Do we really care about the "EtherType" proto (in VLAN speak TPID = Tag
>> Protocol IDentifier)?
>> I mean, it can basically only have two values[1], and we just wanted to
>> know if it is a VLAN (that hardware offloaded/removed for us):
> 
> If we assume everyone follows the standard, this would be correct.
> But apparently, some applications use some ambiguous value as a TPID [0].
> 
> So it is not hard to imagine, some NICs could alllow you to configure your
> custom TPID. I am not sure if any in-tree drivers actually do this, but I think
> it's nice to provide some flexibility on XDP level, especially considering
> network stack stores full vlan_proto.
>

I'm buying your argument, and agree it makes sense to provide TPID in
the call signature.  Given weird hardware exists that allow people to
configure custom TPID.

Looking through kernel defines (in uapi/linux/if_ether.h) I see evidence
that funky QinQ EtherTypes have been used in the past:

  #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN 
OFFICIALLY REGISTERED ID ] */
  #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN 
OFFICIALLY REGISTERED ID ] */
  #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN 
OFFICIALLY REGISTERED ID ] */


> [0]
> https://techhub.hpe.com/eginfolib/networking/docs/switches/7500/5200-1938a_l2-lan_cg/content/495503472.htm
> 
>>
>>   static __always_inline int proto_is_vlan(__u16 h_proto)
>>   {
>> 	return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
>> 		  h_proto == bpf_htons(ETH_P_8021AD));
>>   }
>>
>> [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L75-L79
>>
>> Cc. Andrew Lunn, as I notice DSA have a fake VLAN define ETH_P_DSA_8021Q
>> (in file include/uapi/linux/if_ether.h)
>> Is this actually in use?
>> Maybe some hardware can "VLAN" offload this?
>>
>>
>>> What about rephrasing it this way:
>>>
>>> In case of success, vlan_proto contains VLAN protocol identifier (TPID),
>>> vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).
>>>
>>
>> Hmm, I think we can improve this further. This text becomes part of the
>> documentation for end-users (target audience).  Thus, I think it is
>> worth being more verbose and even mention the existing defines that we
>> are expecting end-users to take advantage of.
>>
>> What about:
>>
>> In case of success. The VLAN EtherType is stored in vlan_proto (usually
>> either ETH_P_8021Q or ETH_P_8021AD) also known as TPID (Tag Protocol
>> IDentifier). The VLAN tag is stored in vlan_tag, which is a 16-bit field
>> containing sub-fields (PCP+DEI+VID). The VLAN ID (VID) is 12-bits
>> commonly extracted using mask VLAN_VID_MASK (0x0fff).  For the meaning
>> of the sub-fields Priority Code Point (PCP) and Drop Eligible Indicator
>> (DEI) (formerly CFI) please reference other documentation. Remember
>> these 16-bit fields are stored in network-byte. Thus, transformation
>> with byte-order helper functions like bpf_ntohs() are needed.
>>
> 
> AFAIK, vlan_tag is stored in host byte order, this is how it is in skb.

I'm not sure we should follow SKB storage scheme for XDP.

> In ice, we receive VLAN tag in descriptor already in LE.
> Only protocol is BE (network byte order). So I would replace the last 2
> sentences with the following:
> 
> vlan_tag is stored in host byte order, so no byte order conversion is needed.

Yikes, that was unexpected.  This needs to be heavily documented in docs.

When parsing packets, it is in network-byte-order, else my code is wrong 
here[1]:

   [1] 
https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L122

I'm accessing the skb->vlan_tci here [2], and I notice I don't do any
byte-order conversions, so fortunately I didn't make a code mistake.

   [2] 
https://github.com/xdp-project/bpf-examples/blob/master/traffic-pacing-edt/edt_pacer_vlan.c#L215

> vlan_proto is stored in network byte order, the suggested way to use this value:
> 
> vlan_proto == bpf_htons(ETH_P_8021Q)
> 
>>
>>

--Jesper
Larysa Zaremba July 6, 2023, 2:46 p.m. UTC | #6
On Tue, Jul 04, 2023 at 04:18:04PM +0200, Jesper Dangaard Brouer wrote:
> 
> 
> On 04/07/2023 13.02, Larysa Zaremba wrote:
> > On Tue, Jul 04, 2023 at 12:23:45PM +0200, Jesper Dangaard Brouer wrote:
> > > 
> > > On 04/07/2023 10.23, Larysa Zaremba wrote:
> > > > On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
> > > > > Larysa Zaremba wrote:
> > > > > > Implement functionality that enables drivers to expose VLAN tag
> > > > > > to XDP code.
> > > > > > 
> > > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > > ---
> > > > > >    Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
> > > > > >    include/linux/netdevice.h                    |  2 ++
> > > > > >    include/net/xdp.h                            |  2 ++
> > > > > >    kernel/bpf/offload.c                         |  2 ++
> > > > > >    net/core/xdp.c                               | 20 ++++++++++++++++++++
> > > > > >    5 files changed, 33 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
> > > > > > index 25ce72af81c2..ea6dd79a21d3 100644
> > > > > > --- a/Documentation/networking/xdp-rx-metadata.rst
> > > > > > +++ b/Documentation/networking/xdp-rx-metadata.rst
> > > > > > @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
> > > > > >    metadata is supported, this set will grow:
> > > > > >    .. kernel-doc:: net/core/xdp.c
> > > > > > -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
> > > > > > +   :identifiers: bpf_xdp_metadata_rx_timestamp
> > > > > > +
> > > > > > +.. kernel-doc:: net/core/xdp.c
> > > > > > +   :identifiers: bpf_xdp_metadata_rx_hash
> > > > > > +
> > > > > > +.. kernel-doc:: net/core/xdp.c
> > > > > > +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
> > > > > >    An XDP program can use these kfuncs to read the metadata into stack
> > > > > >    variables for its own consumption. Or, to pass the metadata on to other
> > > [...]
> > > > > > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > > > > > index 41e5ca8643ec..f6262c90e45f 100644
> > > > > > --- a/net/core/xdp.c
> > > > > > +++ b/net/core/xdp.c
> > > > > > @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> > > > > >    	return -EOPNOTSUPP;
> > > > > >    }
> > > > > > +/**
> > > > > > + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
> > > > > > + * @ctx: XDP context pointer.
> > > > > > + * @vlan_tag: Destination pointer for VLAN tag
> > > > > > + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
> > > > > > + *
> > > > > > + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
> > > > > > + * containing VLAN ID, vlan_proto contains protocol identifier.
> > > > > 
> > > > > Above is a bit confusing to me at least.
> > > > > 
> > > > > The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
> > > > > are to be included here? The VlanID or the full 16bit TCI meaning the
> > > > > PCP+DEI+VID?
> > > > 
> > > > It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to
> > > > xdp_hw_metadata") this is more clear, because the tag is parsed.
> > > > 
> > > 
> > > Do we really care about the "EtherType" proto (in VLAN speak TPID = Tag
> > > Protocol IDentifier)?
> > > I mean, it can basically only have two values[1], and we just wanted to
> > > know if it is a VLAN (that hardware offloaded/removed for us):
> > 
> > If we assume everyone follows the standard, this would be correct.
> > But apparently, some applications use some ambiguous value as a TPID [0].
> > 
> > So it is not hard to imagine, some NICs could alllow you to configure your
> > custom TPID. I am not sure if any in-tree drivers actually do this, but I think
> > it's nice to provide some flexibility on XDP level, especially considering
> > network stack stores full vlan_proto.
> > 
> 
> I'm buying your argument, and agree it makes sense to provide TPID in
> the call signature.  Given weird hardware exists that allow people to
> configure custom TPID.
> 
> Looking through kernel defines (in uapi/linux/if_ether.h) I see evidence
> that funky QinQ EtherTypes have been used in the past:
> 
>  #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
> REGISTERED ID ] */
>  #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
> REGISTERED ID ] */
>  #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
> REGISTERED ID ] */
> 
> 
> > [0]
> > https://techhub.hpe.com/eginfolib/networking/docs/switches/7500/5200-1938a_l2-lan_cg/content/495503472.htm
> > 
> > > 
> > >   static __always_inline int proto_is_vlan(__u16 h_proto)
> > >   {
> > > 	return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
> > > 		  h_proto == bpf_htons(ETH_P_8021AD));
> > >   }
> > > 
> > > [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L75-L79
> > > 
> > > Cc. Andrew Lunn, as I notice DSA have a fake VLAN define ETH_P_DSA_8021Q
> > > (in file include/uapi/linux/if_ether.h)
> > > Is this actually in use?
> > > Maybe some hardware can "VLAN" offload this?
> > > 
> > > 
> > > > What about rephrasing it this way:
> > > > 
> > > > In case of success, vlan_proto contains VLAN protocol identifier (TPID),
> > > > vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).
> > > > 
> > > 
> > > Hmm, I think we can improve this further. This text becomes part of the
> > > documentation for end-users (target audience).  Thus, I think it is
> > > worth being more verbose and even mention the existing defines that we
> > > are expecting end-users to take advantage of.
> > > 
> > > What about:
> > > 
> > > In case of success. The VLAN EtherType is stored in vlan_proto (usually
> > > either ETH_P_8021Q or ETH_P_8021AD) also known as TPID (Tag Protocol
> > > IDentifier). The VLAN tag is stored in vlan_tag, which is a 16-bit field
> > > containing sub-fields (PCP+DEI+VID). The VLAN ID (VID) is 12-bits
> > > commonly extracted using mask VLAN_VID_MASK (0x0fff).  For the meaning
> > > of the sub-fields Priority Code Point (PCP) and Drop Eligible Indicator
> > > (DEI) (formerly CFI) please reference other documentation. Remember
> > > these 16-bit fields are stored in network-byte. Thus, transformation
> > > with byte-order helper functions like bpf_ntohs() are needed.
> > > 
> > 
> > AFAIK, vlan_tag is stored in host byte order, this is how it is in skb.
> 
> I'm not sure we should follow SKB storage scheme for XDP.
>

I think following SKB convention is a good idea in this particular case. As I 
have mentioned below, in ice VLAN TCI in descriptor already comes in LE, so no 
point in converting it into BE, so somebody would use bpf_ntohs() later anyway. 
We are not the only manufacturer that does this.

> > In ice, we receive VLAN tag in descriptor already in LE.
> > Only protocol is BE (network byte order). So I would replace the last 2
> > sentences with the following:
> > 
> > vlan_tag is stored in host byte order, so no byte order conversion is needed.
> 
> Yikes, that was unexpected.  This needs to be heavily documented in docs.

You mean the motivation, why it is so and not the other way around?

> 
> When parsing packets, it is in network-byte-order, else my code is wrong
> here[1]:
> 
>   [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L122
> 
> I'm accessing the skb->vlan_tci here [2], and I notice I don't do any
> byte-order conversions, so fortunately I didn't make a code mistake.
> 
>   [2] https://github.com/xdp-project/bpf-examples/blob/master/traffic-pacing-edt/edt_pacer_vlan.c#L215
>

In raw packet, VLAN TCI is in network byte order, but skb requires NIC/driver
to convert it into host byte order before putting it into skb.
 
> > vlan_proto is stored in network byte order, the suggested way to use this value:
> > 
> > vlan_proto == bpf_htons(ETH_P_8021Q)
> > 
> > > 
> > > 
> 
> --Jesper
>
Jesper Dangaard Brouer July 7, 2023, 1:57 p.m. UTC | #7
On 06/07/2023 16.46, Larysa Zaremba wrote:
> On Tue, Jul 04, 2023 at 04:18:04PM +0200, Jesper Dangaard Brouer wrote:
>>
>>
>> On 04/07/2023 13.02, Larysa Zaremba wrote:
>>> On Tue, Jul 04, 2023 at 12:23:45PM +0200, Jesper Dangaard Brouer wrote:
>>>>
>>>> On 04/07/2023 10.23, Larysa Zaremba wrote:
>>>>> On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
>>>>>> Larysa Zaremba wrote:
>>>>>>> Implement functionality that enables drivers to expose VLAN tag
>>>>>>> to XDP code.
>>>>>>>
>>>>>>> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
>>>>>>> ---
>>>>>>>     Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
>>>>>>>     include/linux/netdevice.h                    |  2 ++
>>>>>>>     include/net/xdp.h                            |  2 ++
>>>>>>>     kernel/bpf/offload.c                         |  2 ++
>>>>>>>     net/core/xdp.c                               | 20 ++++++++++++++++++++
>>>>>>>     5 files changed, 33 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
>>>>>>> index 25ce72af81c2..ea6dd79a21d3 100644
>>>>>>> --- a/Documentation/networking/xdp-rx-metadata.rst
>>>>>>> +++ b/Documentation/networking/xdp-rx-metadata.rst
>>>>>>> @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
>>>>>>>     metadata is supported, this set will grow:
>>>>>>>     .. kernel-doc:: net/core/xdp.c
>>>>>>> -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
>>>>>>> +   :identifiers: bpf_xdp_metadata_rx_timestamp
>>>>>>> +
>>>>>>> +.. kernel-doc:: net/core/xdp.c
>>>>>>> +   :identifiers: bpf_xdp_metadata_rx_hash
>>>>>>> +
>>>>>>> +.. kernel-doc:: net/core/xdp.c
>>>>>>> +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
>>>>>>>     An XDP program can use these kfuncs to read the metadata into stack
>>>>>>>     variables for its own consumption. Or, to pass the metadata on to other
>>>> [...]
>>>>>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>>>>>> index 41e5ca8643ec..f6262c90e45f 100644
>>>>>>> --- a/net/core/xdp.c
>>>>>>> +++ b/net/core/xdp.c
>>>>>>> @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>>>>>>>     	return -EOPNOTSUPP;
>>>>>>>     }
>>>>>>> +/**
>>>>>>> + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
>>>>>>> + * @ctx: XDP context pointer.
>>>>>>> + * @vlan_tag: Destination pointer for VLAN tag
>>>>>>> + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
>>>>>>> + *
>>>>>>> + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
>>>>>>> + * containing VLAN ID, vlan_proto contains protocol identifier.
>>>>>>
>>>>>> Above is a bit confusing to me at least.
>>>>>>
>>>>>> The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
>>>>>> are to be included here? The VlanID or the full 16bit TCI meaning the
>>>>>> PCP+DEI+VID?
>>>>>
>>>>> It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to
>>>>> xdp_hw_metadata") this is more clear, because the tag is parsed.
>>>>>
>>>>
>>>> Do we really care about the "EtherType" proto (in VLAN speak TPID = Tag
>>>> Protocol IDentifier)?
>>>> I mean, it can basically only have two values[1], and we just wanted to
>>>> know if it is a VLAN (that hardware offloaded/removed for us):
>>>
>>> If we assume everyone follows the standard, this would be correct.
>>> But apparently, some applications use some ambiguous value as a TPID [0].
>>>
>>> So it is not hard to imagine, some NICs could alllow you to configure your
>>> custom TPID. I am not sure if any in-tree drivers actually do this, but I think
>>> it's nice to provide some flexibility on XDP level, especially considering
>>> network stack stores full vlan_proto.
>>>
>>
>> I'm buying your argument, and agree it makes sense to provide TPID in
>> the call signature.  Given weird hardware exists that allow people to
>> configure custom TPID.
>>
>> Looking through kernel defines (in uapi/linux/if_ether.h) I see evidence
>> that funky QinQ EtherTypes have been used in the past:
>>
>>   #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
>> REGISTERED ID ] */
>>   #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
>> REGISTERED ID ] */
>>   #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
>> REGISTERED ID ] */
>>
>>
>>> [0]
>>> https://techhub.hpe.com/eginfolib/networking/docs/switches/7500/5200-1938a_l2-lan_cg/content/495503472.htm
>>>
>>>>
>>>>    static __always_inline int proto_is_vlan(__u16 h_proto)
>>>>    {
>>>> 	return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
>>>> 		  h_proto == bpf_htons(ETH_P_8021AD));
>>>>    }
>>>>
>>>> [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L75-L79
>>>>
>>>> Cc. Andrew Lunn, as I notice DSA have a fake VLAN define ETH_P_DSA_8021Q
>>>> (in file include/uapi/linux/if_ether.h)
>>>> Is this actually in use?
>>>> Maybe some hardware can "VLAN" offload this?
>>>>
>>>>
>>>>> What about rephrasing it this way:
>>>>>
>>>>> In case of success, vlan_proto contains VLAN protocol identifier (TPID),
>>>>> vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).
>>>>>
>>>>
>>>> Hmm, I think we can improve this further. This text becomes part of the
>>>> documentation for end-users (target audience).  Thus, I think it is
>>>> worth being more verbose and even mention the existing defines that we
>>>> are expecting end-users to take advantage of.
>>>>
>>>> What about:
>>>>
>>>> In case of success. The VLAN EtherType is stored in vlan_proto (usually
>>>> either ETH_P_8021Q or ETH_P_8021AD) also known as TPID (Tag Protocol
>>>> IDentifier). The VLAN tag is stored in vlan_tag, which is a 16-bit field
>>>> containing sub-fields (PCP+DEI+VID). The VLAN ID (VID) is 12-bits
>>>> commonly extracted using mask VLAN_VID_MASK (0x0fff).  For the meaning
>>>> of the sub-fields Priority Code Point (PCP) and Drop Eligible Indicator
>>>> (DEI) (formerly CFI) please reference other documentation. Remember
>>>> these 16-bit fields are stored in network-byte. Thus, transformation
>>>> with byte-order helper functions like bpf_ntohs() are needed.
>>>>
>>>
>>> AFAIK, vlan_tag is stored in host byte order, this is how it is in skb.
>>
>> I'm not sure we should follow SKB storage scheme for XDP.
>>
> 
> I think following SKB convention is a good idea in this particular case. As I
> have mentioned below, in ice VLAN TCI in descriptor already comes in LE, so no
> point in converting it into BE, so somebody would use bpf_ntohs() later anyway.
> We are not the only manufacturer that does this.
> 

As long as other NIC hardware does the same this seems okay.


>>> In ice, we receive VLAN tag in descriptor already in LE.
>>> Only protocol is BE (network byte order). So I would replace the last 2
>>> sentences with the following:
>>>
>>> vlan_tag is stored in host byte order, so no byte order conversion is needed.
>>
>> Yikes, that was unexpected.  This needs to be heavily documented in docs.
> 
> You mean the motivation, why it is so and not the other way around?
> 

No, I don't mean the motivation.
I simply mean write it in *bold*.

Look at the description for bpf_xdp_metadata_rx_hash, how it gets
rendered [1] and how the code comments look [2].

  [1] 
https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html#general-design
  [2] https://elixir.bootlin.com/linux/v6.4/source/net/core/xdp.c#L724

To save you some time compiling htmldocs target:

  make SPHINXDIRS="networking" V=1  htmldocs

>>
>> When parsing packets, it is in network-byte-order, else my code is wrong
>> here[1]:
>>
>>    [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L122
>>
>> I'm accessing the skb->vlan_tci here [2], and I notice I don't do any
>> byte-order conversions, so fortunately I didn't make a code mistake.
>>
>>    [2] https://github.com/xdp-project/bpf-examples/blob/master/traffic-pacing-edt/edt_pacer_vlan.c#L215
>>
> 
> In raw packet, VLAN TCI is in network byte order, but skb requires NIC/driver
> to convert it into host byte order before putting it into skb.
>

I'm interested in if *most* NIC hardware will deliver this in LE
(Little-Endian) which is host-byte order on x86 ?


>>> vlan_proto is stored in network byte order, the suggested way to use this value:
>>>
>>> vlan_proto == bpf_htons(ETH_P_8021Q)
>>>
>>>>
>>>>
>>
>> --Jesper
>>
>
Larysa Zaremba July 7, 2023, 5:58 p.m. UTC | #8
On Fri, Jul 07, 2023 at 03:57:13PM +0200, Jesper Dangaard Brouer wrote:
> 
> 
> On 06/07/2023 16.46, Larysa Zaremba wrote:
> > On Tue, Jul 04, 2023 at 04:18:04PM +0200, Jesper Dangaard Brouer wrote:
> > > 
> > > 
> > > On 04/07/2023 13.02, Larysa Zaremba wrote:
> > > > On Tue, Jul 04, 2023 at 12:23:45PM +0200, Jesper Dangaard Brouer wrote:
> > > > > 
> > > > > On 04/07/2023 10.23, Larysa Zaremba wrote:
> > > > > > On Mon, Jul 03, 2023 at 01:15:34PM -0700, John Fastabend wrote:
> > > > > > > Larysa Zaremba wrote:
> > > > > > > > Implement functionality that enables drivers to expose VLAN tag
> > > > > > > > to XDP code.
> > > > > > > > 
> > > > > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > > > > ---
> > > > > > > >     Documentation/networking/xdp-rx-metadata.rst |  8 +++++++-
> > > > > > > >     include/linux/netdevice.h                    |  2 ++
> > > > > > > >     include/net/xdp.h                            |  2 ++
> > > > > > > >     kernel/bpf/offload.c                         |  2 ++
> > > > > > > >     net/core/xdp.c                               | 20 ++++++++++++++++++++
> > > > > > > >     5 files changed, 33 insertions(+), 1 deletion(-)
> > > > > > > > 
> > > > > > > > diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
> > > > > > > > index 25ce72af81c2..ea6dd79a21d3 100644
> > > > > > > > --- a/Documentation/networking/xdp-rx-metadata.rst
> > > > > > > > +++ b/Documentation/networking/xdp-rx-metadata.rst
> > > > > > > > @@ -18,7 +18,13 @@ Currently, the following kfuncs are supported. In the future, as more
> > > > > > > >     metadata is supported, this set will grow:
> > > > > > > >     .. kernel-doc:: net/core/xdp.c
> > > > > > > > -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
> > > > > > > > +   :identifiers: bpf_xdp_metadata_rx_timestamp
> > > > > > > > +
> > > > > > > > +.. kernel-doc:: net/core/xdp.c
> > > > > > > > +   :identifiers: bpf_xdp_metadata_rx_hash
> > > > > > > > +
> > > > > > > > +.. kernel-doc:: net/core/xdp.c
> > > > > > > > +   :identifiers: bpf_xdp_metadata_rx_vlan_tag
> > > > > > > >     An XDP program can use these kfuncs to read the metadata into stack
> > > > > > > >     variables for its own consumption. Or, to pass the metadata on to other
> > > > > [...]
> > > > > > > > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > > > > > > > index 41e5ca8643ec..f6262c90e45f 100644
> > > > > > > > --- a/net/core/xdp.c
> > > > > > > > +++ b/net/core/xdp.c
> > > > > > > > @@ -738,6 +738,26 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> > > > > > > >     	return -EOPNOTSUPP;
> > > > > > > >     }
> > > > > > > > +/**
> > > > > > > > + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
> > > > > > > > + * @ctx: XDP context pointer.
> > > > > > > > + * @vlan_tag: Destination pointer for VLAN tag
> > > > > > > > + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
> > > > > > > > + *
> > > > > > > > + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
> > > > > > > > + * containing VLAN ID, vlan_proto contains protocol identifier.
> > > > > > > 
> > > > > > > Above is a bit confusing to me at least.
> > > > > > > 
> > > > > > > The vlan tag would be both the 16bit TPID and 16bit TCI. What fields
> > > > > > > are to be included here? The VlanID or the full 16bit TCI meaning the
> > > > > > > PCP+DEI+VID?
> > > > > > 
> > > > > > It contains PCP+DEI+VID, in patch 16 ("selftests/bpf: Add flags and new hints to
> > > > > > xdp_hw_metadata") this is more clear, because the tag is parsed.
> > > > > > 
> > > > > 
> > > > > Do we really care about the "EtherType" proto (in VLAN speak TPID = Tag
> > > > > Protocol IDentifier)?
> > > > > I mean, it can basically only have two values[1], and we just wanted to
> > > > > know if it is a VLAN (that hardware offloaded/removed for us):
> > > > 
> > > > If we assume everyone follows the standard, this would be correct.
> > > > But apparently, some applications use some ambiguous value as a TPID [0].
> > > > 
> > > > So it is not hard to imagine, some NICs could alllow you to configure your
> > > > custom TPID. I am not sure if any in-tree drivers actually do this, but I think
> > > > it's nice to provide some flexibility on XDP level, especially considering
> > > > network stack stores full vlan_proto.
> > > > 
> > > 
> > > I'm buying your argument, and agree it makes sense to provide TPID in
> > > the call signature.  Given weird hardware exists that allow people to
> > > configure custom TPID.
> > > 
> > > Looking through kernel defines (in uapi/linux/if_ether.h) I see evidence
> > > that funky QinQ EtherTypes have been used in the past:
> > > 
> > >   #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
> > > REGISTERED ID ] */
> > >   #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
> > > REGISTERED ID ] */
> > >   #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY
> > > REGISTERED ID ] */
> > > 
> > > 
> > > > [0]
> > > > https://techhub.hpe.com/eginfolib/networking/docs/switches/7500/5200-1938a_l2-lan_cg/content/495503472.htm
> > > > 
> > > > > 
> > > > >    static __always_inline int proto_is_vlan(__u16 h_proto)
> > > > >    {
> > > > > 	return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
> > > > > 		  h_proto == bpf_htons(ETH_P_8021AD));
> > > > >    }
> > > > > 
> > > > > [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L75-L79
> > > > > 
> > > > > Cc. Andrew Lunn, as I notice DSA have a fake VLAN define ETH_P_DSA_8021Q
> > > > > (in file include/uapi/linux/if_ether.h)
> > > > > Is this actually in use?
> > > > > Maybe some hardware can "VLAN" offload this?
> > > > > 
> > > > > 
> > > > > > What about rephrasing it this way:
> > > > > > 
> > > > > > In case of success, vlan_proto contains VLAN protocol identifier (TPID),
> > > > > > vlan_tag contains the remaining 16 bits of a 802.1Q tag (PCP+DEI+VID).
> > > > > > 
> > > > > 
> > > > > Hmm, I think we can improve this further. This text becomes part of the
> > > > > documentation for end-users (target audience).  Thus, I think it is
> > > > > worth being more verbose and even mention the existing defines that we
> > > > > are expecting end-users to take advantage of.
> > > > > 
> > > > > What about:
> > > > > 
> > > > > In case of success. The VLAN EtherType is stored in vlan_proto (usually
> > > > > either ETH_P_8021Q or ETH_P_8021AD) also known as TPID (Tag Protocol
> > > > > IDentifier). The VLAN tag is stored in vlan_tag, which is a 16-bit field
> > > > > containing sub-fields (PCP+DEI+VID). The VLAN ID (VID) is 12-bits
> > > > > commonly extracted using mask VLAN_VID_MASK (0x0fff).  For the meaning
> > > > > of the sub-fields Priority Code Point (PCP) and Drop Eligible Indicator
> > > > > (DEI) (formerly CFI) please reference other documentation. Remember
> > > > > these 16-bit fields are stored in network-byte. Thus, transformation
> > > > > with byte-order helper functions like bpf_ntohs() are needed.
> > > > > 
> > > > 
> > > > AFAIK, vlan_tag is stored in host byte order, this is how it is in skb.
> > > 
> > > I'm not sure we should follow SKB storage scheme for XDP.
> > > 
> > 
> > I think following SKB convention is a good idea in this particular case. As I
> > have mentioned below, in ice VLAN TCI in descriptor already comes in LE, so no
> > point in converting it into BE, so somebody would use bpf_ntohs() later anyway.
> > We are not the only manufacturer that does this.
> > 
> 
> As long as other NIC hardware does the same this seems okay.
> 
> 
> > > > In ice, we receive VLAN tag in descriptor already in LE.
> > > > Only protocol is BE (network byte order). So I would replace the last 2
> > > > sentences with the following:
> > > > 
> > > > vlan_tag is stored in host byte order, so no byte order conversion is needed.
> > > 
> > > Yikes, that was unexpected.  This needs to be heavily documented in docs.
> > 
> > You mean the motivation, why it is so and not the other way around?
> > 
> 
> No, I don't mean the motivation.
> I simply mean write it in *bold*.
> 
> Look at the description for bpf_xdp_metadata_rx_hash, how it gets
> rendered [1] and how the code comments look [2].
> 
>  [1] https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html#general-design
>  [2] https://elixir.bootlin.com/linux/v6.4/source/net/core/xdp.c#L724
> 
> To save you some time compiling htmldocs target:
> 
>  make SPHINXDIRS="networking" V=1  htmldocs
> 

Ok, will do :)

> > > 
> > > When parsing packets, it is in network-byte-order, else my code is wrong
> > > here[1]:
> > > 
> > >    [1] https://github.com/xdp-project/bpf-examples/blob/master/include/xdp/parsing_helpers.h#L122
> > > 
> > > I'm accessing the skb->vlan_tci here [2], and I notice I don't do any
> > > byte-order conversions, so fortunately I didn't make a code mistake.
> > > 
> > >    [2] https://github.com/xdp-project/bpf-examples/blob/master/traffic-pacing-edt/edt_pacer_vlan.c#L215
> > > 
> > 
> > In raw packet, VLAN TCI is in network byte order, but skb requires NIC/driver
> > to convert it into host byte order before putting it into skb.
> > 
> 
> I'm interested in if *most* NIC hardware will deliver this in LE
> (Little-Endian) which is host-byte order on x86 ?
>

At least intel, pensando and some broadcom products get VLAN TCI in LE.
Mellanox gets in BE.

> 
> > > > vlan_proto is stored in network byte order, the suggested way to use this value:
> > > > 
> > > > vlan_proto == bpf_htons(ETH_P_8021Q)
> > > > 
> > > > > 
> > > > > 
> > > 
> > > --Jesper
> > > 
> > 
>
diff mbox series

Patch

diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index 25ce72af81c2..ea6dd79a21d3 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -18,7 +18,13 @@  Currently, the following kfuncs are supported. In the future, as more
 metadata is supported, this set will grow:
 
 .. kernel-doc:: net/core/xdp.c
-   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
+   :identifiers: bpf_xdp_metadata_rx_timestamp
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_hash
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_vlan_tag
 
 An XDP program can use these kfuncs to read the metadata into stack
 variables for its own consumption. Or, to pass the metadata on to other
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b828c7a75be2..4fa4380e6d89 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1658,6 +1658,8 @@  struct xdp_metadata_ops {
 	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
 	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
 			       enum xdp_rss_hash_type *rss_type);
+	int	(*xmo_rx_vlan_tag)(const struct xdp_md *ctx, u16 *vlan_tag,
+				   __be16 *vlan_proto);
 };
 
 /**
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 6381560efae2..89c58f56ffc6 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -389,6 +389,8 @@  void xdp_attachment_setup(struct xdp_attachment_info *info,
 			   bpf_xdp_metadata_rx_timestamp) \
 	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
 			   bpf_xdp_metadata_rx_hash) \
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
+			   bpf_xdp_metadata_rx_vlan_tag) \
 
 enum {
 #define XDP_METADATA_KFUNC(name, _) name,
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8a26cd8814c1..986e7becfd42 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -848,6 +848,8 @@  void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
 		p = ops->xmo_rx_timestamp;
 	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
 		p = ops->xmo_rx_hash;
+	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_VLAN_TAG))
+		p = ops->xmo_rx_vlan_tag;
 out:
 	up_read(&bpf_devs_lock);
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41e5ca8643ec..f6262c90e45f 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -738,6 +738,26 @@  __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
 	return -EOPNOTSUPP;
 }
 
+/**
+ * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol
+ * @ctx: XDP context pointer.
+ * @vlan_tag: Destination pointer for VLAN tag
+ * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order.
+ *
+ * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes
+ * containing VLAN ID, vlan_proto contains protocol identifier.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
+ * * ``-ENODATA``    : VLAN tag was not stripped or is not available
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag,
+					     __be16 *vlan_proto)
+{
+	return -EOPNOTSUPP;
+}
+
 __diag_pop();
 
 BTF_SET8_START(xdp_metadata_kfunc_ids)