diff mbox series

[RFC,net-next,3/5] net: dsa: tag_rtl8_4: add realtek 8 byte protocol 4 tag

Message ID 20210822193145.1312668-4-alvin@pqrs.dk (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series net: dsa: add support for RTL8365MB-VC | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 8 this patch: 9
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: please write a paragraph that describes the config symbol fully
netdev/build_allmodconfig_warn fail Errors and warnings before: 8 this patch: 9
netdev/header_inline success Link

Commit Message

Alvin Šipraga Aug. 22, 2021, 7:31 p.m. UTC
From: Alvin Šipraga <alsi@bang-olufsen.dk>

This commit implements a basic version of the 8 byte tag protocol used
in the Realtek RTL8365MB-VC switch, which carries with it a protocol
version of 0x04.

The implementation itself only handles the parsing of the EtherType
value and Realtek protocol version, together with the source or
destination port fields. The rest is left unimplemented for now.

The tag format is described in a confidential document provided to my
employer - Bang & Olufsen a/s - by Realtek Semiconductor Corp.
Permission has been granted by Realtek to publish this driver based on
that material, together with an extract from the document describing the
tag format and its fields.  It is hoped that this will help future
implementors who do not have access to the material but who wish to
extend the functionality of drivers for chips which use this protocol.

Signed-off-by: Alvin Šipraga <alsi@bang-olufsen.dk>
---
 include/net/dsa.h    |   2 +
 net/dsa/Kconfig      |   6 ++
 net/dsa/Makefile     |   1 +
 net/dsa/tag_rtl8_4.c | 178 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 187 insertions(+)
 create mode 100644 net/dsa/tag_rtl8_4.c

Comments

Andrew Lunn Aug. 22, 2021, 10:02 p.m. UTC | #1
On Sun, Aug 22, 2021 at 09:31:41PM +0200, Alvin Šipraga wrote:
> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
> index 548285539752..470a2f3e8f75 100644
> --- a/net/dsa/Kconfig
> +++ b/net/dsa/Kconfig
> @@ -99,6 +99,12 @@ config NET_DSA_TAG_RTL4_A
>  	  Realtek switches with 4 byte protocol A tags, sich as found in
>  	  the Realtek RTL8366RB.
>  
> +config NET_DSA_TAG_RTL8_4
> +	tristate "Tag driver for Realtek 8 byte protocol 4 tags"
> +	help
> +	  Say Y or M if you want to enable support for tagging frames for Realtek
> +	  switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
> +

Hi Alvin

This file is sorted based on the tristate text. As such, the
NET_DSA_TAG_RTL4_A is in the wrong place. So i can understand why you
put it here, but place move it after the Qualcom driver.

> @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
>  obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
>  obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
>  obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
> +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
>  obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
>  obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
>  obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o

The CONFIG_NET_DSA_TAG_RTL4_A is also in the wrong place...

> diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
> new file mode 100644
> index 000000000000..1082bafb6a2e
> --- /dev/null
> +++ b/net/dsa/tag_rtl8_4.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Handler for Realtek 8 byte switch tags
> + *
> + * Copyright (C) 2021 Alvin Šipraga <alsi@bang-olufsen.dk>
> + *
> + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
> + * named tag_rtl8_4.
> + *
> + * This "proprietary tag" header has the following format:

I think they are all proprietary. At least, there is no
standardization across vendors.

> + *
> + *  -------------------------------------------
> + *  | MAC DA | MAC SA | 8 byte tag | Type | ...
> + *  -------------------------------------------
> + *     _______________/            \______________________________________
> + *    /                                                                   \
> + *  0                                  7|8                                 15
> + *  |-----------------------------------+-----------------------------------|---
> + *  |                               (16-bit)                                | ^
> + *  |                       Realtek EtherType [0x8899]                      | |
> + *  |-----------------------------------+-----------------------------------| 8
> + *  |              (8-bit)              |              (8-bit)              |
> + *  |          Protocol [0x04]          |              REASON               | b
> + *  |-----------------------------------+-----------------------------------| y
> + *  |   (1)  | (1) | (2) |   (1)  | (3) | (1)  | (1) |    (1)    |   (5)    | t
> + *  | FID_EN |  X  | FID | PRI_EN | PRI | KEEP |  X  | LEARN_DIS |    X     | e
> + *  |-----------------------------------+-----------------------------------| s
> + *  |   (1)  |                       (15-bit)                               | |
> + *  |  ALLOW |                        TX/RX                                 | v
> + *  |-----------------------------------+-----------------------------------|---
> + *
> + * With the following field descriptions:
> + *
> + *    field      | description
> + *   ------------+-------------
> + *    Realtek    | 0x8899: indicates that this is a proprietary Realtek tag;
> + *     EtherType |         note that Realtek uses the same EtherType for
> + *               |         other incompatible tag formats (e.g. tag_rtl4_a.c)
> + *    Protocol   | 0x04: indicates that this tag conforms to this format
> + *    X          | reserved
> + *   ------------+-------------
> + *    REASON     | reason for forwarding packet to CPU

Are you allowed to document reason? This could be interesting for some
of the future work.

> + *    FID_EN     | 1: packet has an FID
> + *               | 0: no FID
> + *    FID        | FID of packet (if FID_EN=1)
> + *    PRI_EN     | 1: force priority of packet
> + *               | 0: don't force priority
> + *    PRI        | priority of packet (if PRI_EN=1)
> + *    KEEP       | preserve packet VLAN tag format
> + *    LEARN_DIS  | don't learn the source MAC address of the packet
> + *    ALLOW      | 1: treat TX/RX field as an allowance port mask, meaning the
> + *               |    packet may only be forwarded to ports specified in the
> + *               |    mask
> + *               | 0: no allowance port mask, TX/RX field is the forwarding
> + *               |    port mask
> + *    TX/RX      | TX (switch->CPU): port number the packet was received on
> + *               | RX (CPU->switch): forwarding port mask (if ALLOW=0)
> + *               |                   allowance port mask (if ALLOW=1)

There are some interesting fields here. It will be interesting to see
what we can do with the switch.

> + */
> +
> +#include <linux/etherdevice.h>
> +#include <linux/bits.h>
> +
> +#include "dsa_priv.h"
> +
> +#define RTL8_4_TAG_LEN			8
> +#define RTL8_4_ETHERTYPE		0x8899

Please add this to include/uapi/linux/if_ether.h

> +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
> +				      struct net_device *dev)
> +{
> +	__be16 *p;
> +	u16 etype;
> +	u8 proto;
> +	u8 *tag;
> +	u8 port;
> +	u16 tmp;
> +
> +	if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
> +		return NULL;
> +
> +	tag = dsa_etype_header_pos_rx(skb);
> +
> +	/* Parse Realtek EtherType */
> +	p = (__be16 *)tag;
> +	etype = ntohs(*p);
> +	if (unlikely(etype != RTL8_4_ETHERTYPE)) {
> +		netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);

You probably want to rate limit these sorts of prints. You have
limited controller of what frames come in, and if somebody can craft
bad frames, they can make you send all your time printing messages to
the log.

    Andrew
Vladimir Oltean Aug. 22, 2021, 10:13 p.m. UTC | #2
On Sun, Aug 22, 2021 at 09:31:41PM +0200, Alvin Šipraga wrote:
> From: Alvin Šipraga <alsi@bang-olufsen.dk>
> 
> This commit implements a basic version of the 8 byte tag protocol used
> in the Realtek RTL8365MB-VC switch, which carries with it a protocol
> version of 0x04.
> 
> The implementation itself only handles the parsing of the EtherType
> value and Realtek protocol version, together with the source or
> destination port fields. The rest is left unimplemented for now.
> 
> The tag format is described in a confidential document provided to my
> employer - Bang & Olufsen a/s - by Realtek Semiconductor Corp.
> Permission has been granted by Realtek to publish this driver based on
> that material, together with an extract from the document describing the
> tag format and its fields.  It is hoped that this will help future
> implementors who do not have access to the material but who wish to
> extend the functionality of drivers for chips which use this protocol.
> 
> Signed-off-by: Alvin Šipraga <alsi@bang-olufsen.dk>
> ---
>  include/net/dsa.h    |   2 +
>  net/dsa/Kconfig      |   6 ++
>  net/dsa/Makefile     |   1 +
>  net/dsa/tag_rtl8_4.c | 178 +++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 187 insertions(+)
>  create mode 100644 net/dsa/tag_rtl8_4.c
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 0c2cba45fa79..6d8b5a11d99a 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -51,6 +51,7 @@ struct phylink_link_state;
>  #define DSA_TAG_PROTO_SEVILLE_VALUE		21
>  #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE		22
>  #define DSA_TAG_PROTO_SJA1110_VALUE		23
> +#define DSA_TAG_PROTO_RTL8_4_VALUE		24
>  
>  enum dsa_tag_protocol {
>  	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
> @@ -77,6 +78,7 @@ enum dsa_tag_protocol {
>  	DSA_TAG_PROTO_OCELOT_8021Q	= DSA_TAG_PROTO_OCELOT_8021Q_VALUE,
>  	DSA_TAG_PROTO_SEVILLE		= DSA_TAG_PROTO_SEVILLE_VALUE,
>  	DSA_TAG_PROTO_SJA1110		= DSA_TAG_PROTO_SJA1110_VALUE,
> +	DSA_TAG_PROTO_RTL8_4		= DSA_TAG_PROTO_RTL8_4_VALUE,
>  };
>  
>  struct dsa_switch;
> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
> index 548285539752..470a2f3e8f75 100644
> --- a/net/dsa/Kconfig
> +++ b/net/dsa/Kconfig
> @@ -99,6 +99,12 @@ config NET_DSA_TAG_RTL4_A
>  	  Realtek switches with 4 byte protocol A tags, sich as found in
>  	  the Realtek RTL8366RB.
>  
> +config NET_DSA_TAG_RTL8_4
> +	tristate "Tag driver for Realtek 8 byte protocol 4 tags"
> +	help
> +	  Say Y or M if you want to enable support for tagging frames for Realtek
> +	  switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
> +
>  config NET_DSA_TAG_OCELOT
>  	tristate "Tag driver for Ocelot family of switches, using NPI port"
>  	depends on MSCC_OCELOT_SWITCH_LIB || \
> diff --git a/net/dsa/Makefile b/net/dsa/Makefile
> index 67ea009f242c..01282817e80e 100644
> --- a/net/dsa/Makefile
> +++ b/net/dsa/Makefile
> @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
>  obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
>  obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
>  obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
> +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
>  obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
>  obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
>  obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
> diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
> new file mode 100644
> index 000000000000..1082bafb6a2e
> --- /dev/null
> +++ b/net/dsa/tag_rtl8_4.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Handler for Realtek 8 byte switch tags
> + *
> + * Copyright (C) 2021 Alvin Šipraga <alsi@bang-olufsen.dk>
> + *
> + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
> + * named tag_rtl8_4.
> + *
> + * This "proprietary tag" header has the following format:
> + *
> + *  -------------------------------------------
> + *  | MAC DA | MAC SA | 8 byte tag | Type | ...
> + *  -------------------------------------------
> + *     _______________/            \______________________________________
> + *    /                                                                   \
> + *  0                                  7|8                                 15
> + *  |-----------------------------------+-----------------------------------|---
> + *  |                               (16-bit)                                | ^
> + *  |                       Realtek EtherType [0x8899]                      | |
> + *  |-----------------------------------+-----------------------------------| 8
> + *  |              (8-bit)              |              (8-bit)              |
> + *  |          Protocol [0x04]          |              REASON               | b
> + *  |-----------------------------------+-----------------------------------| y
> + *  |   (1)  | (1) | (2) |   (1)  | (3) | (1)  | (1) |    (1)    |   (5)    | t
> + *  | FID_EN |  X  | FID | PRI_EN | PRI | KEEP |  X  | LEARN_DIS |    X     | e
> + *  |-----------------------------------+-----------------------------------| s
> + *  |   (1)  |                       (15-bit)                               | |
> + *  |  ALLOW |                        TX/RX                                 | v
> + *  |-----------------------------------+-----------------------------------|---
> + *
> + * With the following field descriptions:
> + *
> + *    field      | description
> + *   ------------+-------------
> + *    Realtek    | 0x8899: indicates that this is a proprietary Realtek tag;
> + *     EtherType |         note that Realtek uses the same EtherType for
> + *               |         other incompatible tag formats (e.g. tag_rtl4_a.c)
> + *    Protocol   | 0x04: indicates that this tag conforms to this format
> + *    X          | reserved
> + *   ------------+-------------
> + *    REASON     | reason for forwarding packet to CPU
> + *    FID_EN     | 1: packet has an FID
> + *               | 0: no FID
> + *    FID        | FID of packet (if FID_EN=1)
> + *    PRI_EN     | 1: force priority of packet
> + *               | 0: don't force priority
> + *    PRI        | priority of packet (if PRI_EN=1)
> + *    KEEP       | preserve packet VLAN tag format

What does it mean to preserve packet VLAN tag format? Trying to
understand if the sane thing is to clear or set this bit. Does it mean
to strip the VLAN tag on egress if the VLAN is configured as
egress-untagged on the port?

> + *    LEARN_DIS  | don't learn the source MAC address of the packet
> + *    ALLOW      | 1: treat TX/RX field as an allowance port mask, meaning the
> + *               |    packet may only be forwarded to ports specified in the
> + *               |    mask
> + *               | 0: no allowance port mask, TX/RX field is the forwarding
> + *               |    port mask
> + *    TX/RX      | TX (switch->CPU): port number the packet was received on
> + *               | RX (CPU->switch): forwarding port mask (if ALLOW=0)
> + *               |                   allowance port mask (if ALLOW=1)
> + */
> +
> +#include <linux/etherdevice.h>
> +#include <linux/bits.h>
> +
> +#include "dsa_priv.h"
> +
> +#define RTL8_4_TAG_LEN			8
> +#define RTL8_4_ETHERTYPE		0x8899
> +/* 0x04 = RTL8365MB DSA protocol
> + */
> +#define RTL8_4_PROTOCOL_RTL8365MB	0x04
> +
> +static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
> +				       struct net_device *dev)
> +{
> +	struct dsa_port *dp = dsa_slave_to_port(dev);
> +	__be16 *p;
> +	u8 *tag;
> +	u16 out;
> +
> +	/* Pad out so that the (stripped) packet is at least 64 bytes long
> +	 * (including FCS), otherwise the switch will drop the packet.
> +	 * Then we need an additional 8 bytes for the Realtek tag.
> +	 */
> +	if (__skb_put_padto(skb, ETH_ZLEN + RTL8_4_TAG_LEN, false))
> +		return NULL;
> +
> +	skb_push(skb, RTL8_4_TAG_LEN);
> +
> +	dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
> +	tag = dsa_etype_header_pos_tx(skb);
> +
> +	/* Set Realtek EtherType */
> +	p = (__be16 *)tag;

You would have much fewer (zero) type casts if "tag" was a "__be16 *" in
the first place. Additionally, you would not need "p" and you could work
with tag[0], tag[1], tag[2], tag[3].

> +	*p = htons(RTL8_4_ETHERTYPE);
> +
> +	/* Set Protocol; zero REASON */
> +	p = (__be16 *)(tag + 2);
> +	*p = htons(RTL8_4_PROTOCOL_RTL8365MB << 8);
> +
> +	/* Zero FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */

Please set LEARN_DIS. DSA has better ways to control what needs to be
learned and what doesn't. Packets injected into a standalone port
surely shouldn't have their MAC SA learned. Grep for "tx_fwd_offload" in
the kernel, see what it takes to transmit a packet with
skb->offload_fwd_mark = true, and you can clear LEARN_DIS and set ALLOW
then.

> +	p = (__be16 *)(tag + 4);
> +	*p = 0;
> +
> +	/* Zero ALLOW; set RX (CPU->switch) forwarding port mask */
> +	p = (__be16 *)(tag + 6);
> +	out = BIT(dp->index);

Set but unused variable.

> +	*p = htons(~(1 << 15) & BIT(dp->index));

I am deeply confused by this line.

~(1 << 15) is GENMASK(14, 0)
By AND-ing it with BIT(dp->index), what do you gain?

> +
> +	return skb;
> +}
> +
> +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
> +				      struct net_device *dev)
> +{
> +	__be16 *p;
> +	u16 etype;
> +	u8 proto;
> +	u8 *tag;
> +	u8 port;
> +	u16 tmp;
> +
> +	if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
> +		return NULL;
> +
> +	tag = dsa_etype_header_pos_rx(skb);
> +
> +	/* Parse Realtek EtherType */
> +	p = (__be16 *)tag;

Same comment about it being more productive for "tag" to be "__be16 *".

> +	etype = ntohs(*p);
> +	if (unlikely(etype != RTL8_4_ETHERTYPE)) {
> +		netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
> +		return NULL;
> +	}
> +
> +	/* Parse Protocol; ignore REASON */
> +	p = (__be16 *)(tag + 2);
> +	tmp = ntohs(*p);
> +	proto = tmp >> 8;
> +	if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) {
> +		netdev_dbg(dev, "unknown realtek protocol 0x%02x\n", proto);
> +		return NULL;
> +	}
> +
> +	/* Ignore FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
> +	p = (__be16 *)(tag + 4);

Delete then?

> +
> +	/* Ignore ALLOW; parse TX (switch->CPU) */
> +	p = (__be16 *)(tag + 6);
> +	tmp = ntohs(*p);
> +	port = tmp & 0xf; /* Port number is the LSB 4 bits */
> +
> +	skb->dev = dsa_master_find_slave(dev, 0, port);
> +	if (!skb->dev) {
> +		netdev_dbg(dev, "could not find slave for port %d\n", port);
> +		return NULL;
> +	}
> +
> +	/* Remove tag and recalculate checksum */
> +	skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
> +
> +	dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
> +
> +	skb->offload_fwd_mark = 1;

At the very least, please use

	dsa_default_offload_fwd_mark(skb);

which does the right thing when the port is not offloading the bridge.

Also tell us more about REASON and ALLOW. Is there a bit in the RX tag
which denotes that the packet was forwarded only to the host?

> +
> +	return skb;
> +}
> +
> +static const struct dsa_device_ops rtl8_4_netdev_ops = {
> +	.name = "rtl8_4",
> +	.proto = DSA_TAG_PROTO_RTL8_4,
> +	.xmit = rtl8_4_tag_xmit,
> +	.rcv = rtl8_4_tag_rcv,
> +	.needed_headroom = RTL8_4_TAG_LEN,
> +};
> +module_dsa_tag_driver(rtl8_4_netdev_ops);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4);
> -- 
> 2.32.0
>
Alvin Šipraga Aug. 22, 2021, 10:50 p.m. UTC | #3
Hi Andrew,

On 8/23/21 12:02 AM, Andrew Lunn wrote:
> On Sun, Aug 22, 2021 at 09:31:41PM +0200, Alvin Šipraga wrote:
>> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
>> index 548285539752..470a2f3e8f75 100644
>> --- a/net/dsa/Kconfig
>> +++ b/net/dsa/Kconfig
>> @@ -99,6 +99,12 @@ config NET_DSA_TAG_RTL4_A
>>   	  Realtek switches with 4 byte protocol A tags, sich as found in
>>   	  the Realtek RTL8366RB.
>>   
>> +config NET_DSA_TAG_RTL8_4
>> +	tristate "Tag driver for Realtek 8 byte protocol 4 tags"
>> +	help
>> +	  Say Y or M if you want to enable support for tagging frames for Realtek
>> +	  switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
>> +
> 
> Hi Alvin
> 
> This file is sorted based on the tristate text. As such, the
> NET_DSA_TAG_RTL4_A is in the wrong place. So i can understand why you
> put it here, but place move it after the Qualcom driver.

Thanks - I'll fix it in v2.

> 
>> @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
>>   obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
>>   obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
>>   obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
>> +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
>>   obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
>>   obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
>>   obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
> 
> The CONFIG_NET_DSA_TAG_RTL4_A is also in the wrong place...

Ditto.

> 
>> diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
>> new file mode 100644
>> index 000000000000..1082bafb6a2e
>> --- /dev/null
>> +++ b/net/dsa/tag_rtl8_4.c
>> @@ -0,0 +1,178 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Handler for Realtek 8 byte switch tags
>> + *
>> + * Copyright (C) 2021 Alvin Šipraga <alsi@bang-olufsen.dk>
>> + *
>> + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
>> + * named tag_rtl8_4.
>> + *
>> + * This "proprietary tag" header has the following format:
> 
> I think they are all proprietary. At least, there is no
> standardization across vendors.

I'll remove in v2.

> 
>> + *
>> + *  -------------------------------------------
>> + *  | MAC DA | MAC SA | 8 byte tag | Type | ...
>> + *  -------------------------------------------
>> + *     _______________/            \______________________________________
>> + *    /                                                                   \
>> + *  0                                  7|8                                 15
>> + *  |-----------------------------------+-----------------------------------|---
>> + *  |                               (16-bit)                                | ^
>> + *  |                       Realtek EtherType [0x8899]                      | |
>> + *  |-----------------------------------+-----------------------------------| 8
>> + *  |              (8-bit)              |              (8-bit)              |
>> + *  |          Protocol [0x04]          |              REASON               | b
>> + *  |-----------------------------------+-----------------------------------| y
>> + *  |   (1)  | (1) | (2) |   (1)  | (3) | (1)  | (1) |    (1)    |   (5)    | t
>> + *  | FID_EN |  X  | FID | PRI_EN | PRI | KEEP |  X  | LEARN_DIS |    X     | e
>> + *  |-----------------------------------+-----------------------------------| s
>> + *  |   (1)  |                       (15-bit)                               | |
>> + *  |  ALLOW |                        TX/RX                                 | v
>> + *  |-----------------------------------+-----------------------------------|---
>> + *
>> + * With the following field descriptions:
>> + *
>> + *    field      | description
>> + *   ------------+-------------
>> + *    Realtek    | 0x8899: indicates that this is a proprietary Realtek tag;
>> + *     EtherType |         note that Realtek uses the same EtherType for
>> + *               |         other incompatible tag formats (e.g. tag_rtl4_a.c)
>> + *    Protocol   | 0x04: indicates that this tag conforms to this format
>> + *    X          | reserved
>> + *   ------------+-------------
>> + *    REASON     | reason for forwarding packet to CPU
> 
> Are you allowed to document reason? This could be interesting for some
> of the future work.

Unfortunately the reason field is undocumented. The vendor driver 
doesn't contain any parsing code for the CPU tag so we are left to 
guess. One obvious example would be trapped packets, since the switch 
lets you configure what to do with those (drop, forward to CPU, etc.).

I have not had a reason to look into this yet, otherwise I would have 
documented whatever I knew about it. Hope it's OK for now.

> 
>> + *    FID_EN     | 1: packet has an FID
>> + *               | 0: no FID
>> + *    FID        | FID of packet (if FID_EN=1)
>> + *    PRI_EN     | 1: force priority of packet
>> + *               | 0: don't force priority
>> + *    PRI        | priority of packet (if PRI_EN=1)
>> + *    KEEP       | preserve packet VLAN tag format
>> + *    LEARN_DIS  | don't learn the source MAC address of the packet
>> + *    ALLOW      | 1: treat TX/RX field as an allowance port mask, meaning the
>> + *               |    packet may only be forwarded to ports specified in the
>> + *               |    mask
>> + *               | 0: no allowance port mask, TX/RX field is the forwarding
>> + *               |    port mask
>> + *    TX/RX      | TX (switch->CPU): port number the packet was received on
>> + *               | RX (CPU->switch): forwarding port mask (if ALLOW=0)
>> + *               |                   allowance port mask (if ALLOW=1)
> 
> There are some interesting fields here. It will be interesting to see
> what we can do with the switch.

This is exactly why I asked for Realtek's permission to publish the 
details. :-)

> 
>> + */
>> +
>> +#include <linux/etherdevice.h>
>> +#include <linux/bits.h>
>> +
>> +#include "dsa_priv.h"
>> +
>> +#define RTL8_4_TAG_LEN			8
>> +#define RTL8_4_ETHERTYPE		0x8899
> 
> Please add this to include/uapi/linux/if_ether.h

I believe Realtek uses this EtherType for a bunch of unrelated 
protocols, so I'm not sure this is a good idea. See [1] for a similar 
discussion on the mailing list a while back. What do you think?

[1] 
https://lore.kernel.org/netdev/CACRpkdYQthFgjwVzHyK3DeYUOdcYyWmdjDPG=Rf9B3VrJ12Rzg@mail.gmail.com/

> 
>> +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
>> +				      struct net_device *dev)
>> +{
>> +	__be16 *p;
>> +	u16 etype;
>> +	u8 proto;
>> +	u8 *tag;
>> +	u8 port;
>> +	u16 tmp;
>> +
>> +	if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
>> +		return NULL;
>> +
>> +	tag = dsa_etype_header_pos_rx(skb);
>> +
>> +	/* Parse Realtek EtherType */
>> +	p = (__be16 *)tag;
>> +	etype = ntohs(*p);
>> +	if (unlikely(etype != RTL8_4_ETHERTYPE)) {
>> +		netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
> 
> You probably want to rate limit these sorts of prints. You have
> limited controller of what frames come in, and if somebody can craft
> bad frames, they can make you send all your time printing messages to
> the log.

OK, I think I saw some rate limited version of netdev_dbg so I'll bring 
that in for v2. Thanks for the tip.

> 
>      Andrew
>
Alvin Šipraga Aug. 22, 2021, 11:11 p.m. UTC | #4
Hi Vladimir,

On 8/23/21 12:13 AM, Vladimir Oltean wrote:
> On Sun, Aug 22, 2021 at 09:31:41PM +0200, Alvin Šipraga wrote:
>> From: Alvin Šipraga <alsi@bang-olufsen.dk>
>>
>> This commit implements a basic version of the 8 byte tag protocol used
>> in the Realtek RTL8365MB-VC switch, which carries with it a protocol
>> version of 0x04.
>>
>> The implementation itself only handles the parsing of the EtherType
>> value and Realtek protocol version, together with the source or
>> destination port fields. The rest is left unimplemented for now.
>>
>> The tag format is described in a confidential document provided to my
>> employer - Bang & Olufsen a/s - by Realtek Semiconductor Corp.
>> Permission has been granted by Realtek to publish this driver based on
>> that material, together with an extract from the document describing the
>> tag format and its fields.  It is hoped that this will help future
>> implementors who do not have access to the material but who wish to
>> extend the functionality of drivers for chips which use this protocol.
>>
>> Signed-off-by: Alvin Šipraga <alsi@bang-olufsen.dk>
>> ---
>>   include/net/dsa.h    |   2 +
>>   net/dsa/Kconfig      |   6 ++
>>   net/dsa/Makefile     |   1 +
>>   net/dsa/tag_rtl8_4.c | 178 +++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 187 insertions(+)
>>   create mode 100644 net/dsa/tag_rtl8_4.c
>>
>> diff --git a/include/net/dsa.h b/include/net/dsa.h
>> index 0c2cba45fa79..6d8b5a11d99a 100644
>> --- a/include/net/dsa.h
>> +++ b/include/net/dsa.h
>> @@ -51,6 +51,7 @@ struct phylink_link_state;
>>   #define DSA_TAG_PROTO_SEVILLE_VALUE		21
>>   #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE		22
>>   #define DSA_TAG_PROTO_SJA1110_VALUE		23
>> +#define DSA_TAG_PROTO_RTL8_4_VALUE		24
>>   
>>   enum dsa_tag_protocol {
>>   	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
>> @@ -77,6 +78,7 @@ enum dsa_tag_protocol {
>>   	DSA_TAG_PROTO_OCELOT_8021Q	= DSA_TAG_PROTO_OCELOT_8021Q_VALUE,
>>   	DSA_TAG_PROTO_SEVILLE		= DSA_TAG_PROTO_SEVILLE_VALUE,
>>   	DSA_TAG_PROTO_SJA1110		= DSA_TAG_PROTO_SJA1110_VALUE,
>> +	DSA_TAG_PROTO_RTL8_4		= DSA_TAG_PROTO_RTL8_4_VALUE,
>>   };
>>   
>>   struct dsa_switch;
>> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
>> index 548285539752..470a2f3e8f75 100644
>> --- a/net/dsa/Kconfig
>> +++ b/net/dsa/Kconfig
>> @@ -99,6 +99,12 @@ config NET_DSA_TAG_RTL4_A
>>   	  Realtek switches with 4 byte protocol A tags, sich as found in
>>   	  the Realtek RTL8366RB.
>>   
>> +config NET_DSA_TAG_RTL8_4
>> +	tristate "Tag driver for Realtek 8 byte protocol 4 tags"
>> +	help
>> +	  Say Y or M if you want to enable support for tagging frames for Realtek
>> +	  switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
>> +
>>   config NET_DSA_TAG_OCELOT
>>   	tristate "Tag driver for Ocelot family of switches, using NPI port"
>>   	depends on MSCC_OCELOT_SWITCH_LIB || \
>> diff --git a/net/dsa/Makefile b/net/dsa/Makefile
>> index 67ea009f242c..01282817e80e 100644
>> --- a/net/dsa/Makefile
>> +++ b/net/dsa/Makefile
>> @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
>>   obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
>>   obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
>>   obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
>> +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
>>   obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
>>   obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
>>   obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
>> diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
>> new file mode 100644
>> index 000000000000..1082bafb6a2e
>> --- /dev/null
>> +++ b/net/dsa/tag_rtl8_4.c
>> @@ -0,0 +1,178 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Handler for Realtek 8 byte switch tags
>> + *
>> + * Copyright (C) 2021 Alvin Šipraga <alsi@bang-olufsen.dk>
>> + *
>> + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
>> + * named tag_rtl8_4.
>> + *
>> + * This "proprietary tag" header has the following format:
>> + *
>> + *  -------------------------------------------
>> + *  | MAC DA | MAC SA | 8 byte tag | Type | ...
>> + *  -------------------------------------------
>> + *     _______________/            \______________________________________
>> + *    /                                                                   \
>> + *  0                                  7|8                                 15
>> + *  |-----------------------------------+-----------------------------------|---
>> + *  |                               (16-bit)                                | ^
>> + *  |                       Realtek EtherType [0x8899]                      | |
>> + *  |-----------------------------------+-----------------------------------| 8
>> + *  |              (8-bit)              |              (8-bit)              |
>> + *  |          Protocol [0x04]          |              REASON               | b
>> + *  |-----------------------------------+-----------------------------------| y
>> + *  |   (1)  | (1) | (2) |   (1)  | (3) | (1)  | (1) |    (1)    |   (5)    | t
>> + *  | FID_EN |  X  | FID | PRI_EN | PRI | KEEP |  X  | LEARN_DIS |    X     | e
>> + *  |-----------------------------------+-----------------------------------| s
>> + *  |   (1)  |                       (15-bit)                               | |
>> + *  |  ALLOW |                        TX/RX                                 | v
>> + *  |-----------------------------------+-----------------------------------|---
>> + *
>> + * With the following field descriptions:
>> + *
>> + *    field      | description
>> + *   ------------+-------------
>> + *    Realtek    | 0x8899: indicates that this is a proprietary Realtek tag;
>> + *     EtherType |         note that Realtek uses the same EtherType for
>> + *               |         other incompatible tag formats (e.g. tag_rtl4_a.c)
>> + *    Protocol   | 0x04: indicates that this tag conforms to this format
>> + *    X          | reserved
>> + *   ------------+-------------
>> + *    REASON     | reason for forwarding packet to CPU
>> + *    FID_EN     | 1: packet has an FID
>> + *               | 0: no FID
>> + *    FID        | FID of packet (if FID_EN=1)
>> + *    PRI_EN     | 1: force priority of packet
>> + *               | 0: don't force priority
>> + *    PRI        | priority of packet (if PRI_EN=1)
>> + *    KEEP       | preserve packet VLAN tag format
> 
> What does it mean to preserve packet VLAN tag format? Trying to
> understand if the sane thing is to clear or set this bit. Does it mean
> to strip the VLAN tag on egress if the VLAN is configured as
> egress-untagged on the port?

I suppose you mean "Does it mean _don't_ strip the VLAN tag on egress..."?

I'm not sure what the semantics of this KEEP are. When I configure the 
ports to be egress-untagged, the packets leave the port untagged. When I 
configure the ports without egress-untagged, the packets leave the port 
tagged. This is with the code as you see it - so KEEP=0. If I am to 
hazard a guess, maybe it overrides any port-based egress-untagged 
setting. I will run some tests tomorrow.

> 
>> + *    LEARN_DIS  | don't learn the source MAC address of the packet
>> + *    ALLOW      | 1: treat TX/RX field as an allowance port mask, meaning the
>> + *               |    packet may only be forwarded to ports specified in the
>> + *               |    mask
>> + *               | 0: no allowance port mask, TX/RX field is the forwarding
>> + *               |    port mask
>> + *    TX/RX      | TX (switch->CPU): port number the packet was received on
>> + *               | RX (CPU->switch): forwarding port mask (if ALLOW=0)
>> + *               |                   allowance port mask (if ALLOW=1)
>> + */
>> +
>> +#include <linux/etherdevice.h>
>> +#include <linux/bits.h>
>> +
>> +#include "dsa_priv.h"
>> +
>> +#define RTL8_4_TAG_LEN			8
>> +#define RTL8_4_ETHERTYPE		0x8899
>> +/* 0x04 = RTL8365MB DSA protocol
>> + */
>> +#define RTL8_4_PROTOCOL_RTL8365MB	0x04
>> +
>> +static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
>> +				       struct net_device *dev)
>> +{
>> +	struct dsa_port *dp = dsa_slave_to_port(dev);
>> +	__be16 *p;
>> +	u8 *tag;
>> +	u16 out;
>> +
>> +	/* Pad out so that the (stripped) packet is at least 64 bytes long
>> +	 * (including FCS), otherwise the switch will drop the packet.
>> +	 * Then we need an additional 8 bytes for the Realtek tag.
>> +	 */
>> +	if (__skb_put_padto(skb, ETH_ZLEN + RTL8_4_TAG_LEN, false))
>> +		return NULL;
>> +
>> +	skb_push(skb, RTL8_4_TAG_LEN);
>> +
>> +	dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
>> +	tag = dsa_etype_header_pos_tx(skb);
>> +
>> +	/* Set Realtek EtherType */
>> +	p = (__be16 *)tag;
> 
> You would have much fewer (zero) type casts if "tag" was a "__be16 *" in
> the first place. Additionally, you would not need "p" and you could work
> with tag[0], tag[1], tag[2], tag[3].

Thanks, I'll revisit this in v2.

> 
>> +	*p = htons(RTL8_4_ETHERTYPE);
>> +
>> +	/* Set Protocol; zero REASON */
>> +	p = (__be16 *)(tag + 2);
>> +	*p = htons(RTL8_4_PROTOCOL_RTL8365MB << 8);
>> +
>> +	/* Zero FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
> 
> Please set LEARN_DIS. DSA has better ways to control what needs to be
> learned and what doesn't. Packets injected into a standalone port
> surely shouldn't have their MAC SA learned. Grep for "tx_fwd_offload" in
> the kernel, see what it takes to transmit a packet with
> skb->offload_fwd_mark = true, and you can clear LEARN_DIS and set ALLOW
> then.

OK, I'll take a look.

> 
>> +	p = (__be16 *)(tag + 4);
>> +	*p = 0;
>> +
>> +	/* Zero ALLOW; set RX (CPU->switch) forwarding port mask */
>> +	p = (__be16 *)(tag + 6);
>> +	out = BIT(dp->index);
> 
> Set but unused variable.

Thought I scrubbed this but apparently not. Will address in v2.

> 
>> +	*p = htons(~(1 << 15) & BIT(dp->index));
> 
> I am deeply confused by this line.
> 
> ~(1 << 15) is GENMASK(14, 0)
> By AND-ing it with BIT(dp->index), what do you gain?

Deliberate verbosity for the human who was engaged in writing the 
tagging driver to begin with, but obviously stupid. I'll remove.

> 
>> +
>> +	return skb;
>> +}
>> +
>> +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
>> +				      struct net_device *dev)
>> +{
>> +	__be16 *p;
>> +	u16 etype;
>> +	u8 proto;
>> +	u8 *tag;
>> +	u8 port;
>> +	u16 tmp;
>> +
>> +	if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
>> +		return NULL;
>> +
>> +	tag = dsa_etype_header_pos_rx(skb);
>> +
>> +	/* Parse Realtek EtherType */
>> +	p = (__be16 *)tag;
> 
> Same comment about it being more productive for "tag" to be "__be16 *".

Thanks.

> 
>> +	etype = ntohs(*p);
>> +	if (unlikely(etype != RTL8_4_ETHERTYPE)) {
>> +		netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
>> +		return NULL;
>> +	}
>> +
>> +	/* Parse Protocol; ignore REASON */
>> +	p = (__be16 *)(tag + 2);
>> +	tmp = ntohs(*p);
>> +	proto = tmp >> 8;
>> +	if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) {
>> +		netdev_dbg(dev, "unknown realtek protocol 0x%02x\n", proto);
>> +		return NULL;
>> +	}
>> +
>> +	/* Ignore FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
>> +	p = (__be16 *)(tag + 4);
> 
> Delete then?

Deliberate verbosity again - but I figure any half-decent compiler will 
optimize this out to begin with. I thought it serves as a perfectly fine 
"add stuff here" notice together with the comment, but I can remove in v2.

> 
>> +
>> +	/* Ignore ALLOW; parse TX (switch->CPU) */
>> +	p = (__be16 *)(tag + 6);
>> +	tmp = ntohs(*p);
>> +	port = tmp & 0xf; /* Port number is the LSB 4 bits */
>> +
>> +	skb->dev = dsa_master_find_slave(dev, 0, port);
>> +	if (!skb->dev) {
>> +		netdev_dbg(dev, "could not find slave for port %d\n", port);
>> +		return NULL;
>> +	}
>> +
>> +	/* Remove tag and recalculate checksum */
>> +	skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
>> +
>> +	dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
>> +
>> +	skb->offload_fwd_mark = 1;
> 
> At the very least, please use
> 
> 	dsa_default_offload_fwd_mark(skb);
> 
> which does the right thing when the port is not offloading the bridge.

Sure. Can you elaborate on what you mean by "at the very least"? Can it 
be improved even further?

> 
> Also tell us more about REASON and ALLOW. Is there a bit in the RX tag
> which denotes that the packet was forwarded only to the host?

As I wrote to Andrew, REASON is undocumented and I have not investigated 
this field yet. I have addressed ALLOW upstairs in this email, but 
suffice to say I am not sure.

> 
>> +
>> +	return skb;
>> +}
>> +
>> +static const struct dsa_device_ops rtl8_4_netdev_ops = {
>> +	.name = "rtl8_4",
>> +	.proto = DSA_TAG_PROTO_RTL8_4,
>> +	.xmit = rtl8_4_tag_xmit,
>> +	.rcv = rtl8_4_tag_rcv,
>> +	.needed_headroom = RTL8_4_TAG_LEN,
>> +};
>> +module_dsa_tag_driver(rtl8_4_netdev_ops);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4);
>> -- 
>> 2.32.0
>>
Andrew Lunn Aug. 22, 2021, 11:14 p.m. UTC | #5
> >> + */
> >> +
> >> +#include <linux/etherdevice.h>
> >> +#include <linux/bits.h>
> >> +
> >> +#include "dsa_priv.h"
> >> +
> >> +#define RTL8_4_TAG_LEN			8
> >> +#define RTL8_4_ETHERTYPE		0x8899
> > 
> > Please add this to include/uapi/linux/if_ether.h

Maybe call it ETH_P_REALTEK, and comment /* Multiple Proprietary
protocols */ ?

If you do it in an individual patch, you can explain more in the
commit message about it being used for different protocols by Realtek,
and that no assumptions should be made when trying to decode it.

	  Andrew
Vladimir Oltean Aug. 22, 2021, 11:25 p.m. UTC | #6
On Sun, Aug 22, 2021 at 11:11:21PM +0000, Alvin Šipraga wrote:
> >> + *    KEEP       | preserve packet VLAN tag format
> >
> > What does it mean to preserve packet VLAN tag format? Trying to
> > understand if the sane thing is to clear or set this bit. Does it mean
> > to strip the VLAN tag on egress if the VLAN is configured as
> > egress-untagged on the port?
>
> I suppose you mean "Does it mean _don't_ strip the VLAN tag on egress..."?
>
> I'm not sure what the semantics of this KEEP are. When I configure the
> ports to be egress-untagged, the packets leave the port untagged. When I
> configure the ports without egress-untagged, the packets leave the port
> tagged. This is with the code as you see it - so KEEP=0. If I am to
> hazard a guess, maybe it overrides any port-based egress-untagged
> setting. I will run some tests tomorrow.

Ok, then it makes sense to set KEEP=0 and not override the port settings.

> >
> >> +	*p = htons(~(1 << 15) & BIT(dp->index));
> >
> > I am deeply confused by this line.
> >
> > ~(1 << 15) is GENMASK(14, 0)
> > By AND-ing it with BIT(dp->index), what do you gain?
>
> Deliberate verbosity for the human who was engaged in writing the
> tagging driver to begin with, but obviously stupid. I'll remove.

I wouldn't say "stupid", but it's non-obvious, hard to read and at the same time pointless.
I had to take out the abacus to see if I'm missing something.

> >> +	/* Ignore FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
> >> +	p = (__be16 *)(tag + 4);
> >
> > Delete then?
>
> Deliberate verbosity again - but I figure any half-decent compiler will
> optimize this out to begin with. I thought it serves as a perfectly fine
> "add stuff here" notice together with the comment, but I can remove in v2.

Keeping just the comment is fine, but having the line of code is pretty
pointless. Just like any half-decent compiler will optimize it out, any
developer with half a brain will figure out what to do to parse
FID_EN ... LEARN_DIS thanks to the other comments.

> >
> >> +
> >> +	/* Ignore ALLOW; parse TX (switch->CPU) */
> >> +	p = (__be16 *)(tag + 6);
> >> +	tmp = ntohs(*p);
> >> +	port = tmp & 0xf; /* Port number is the LSB 4 bits */
> >> +
> >> +	skb->dev = dsa_master_find_slave(dev, 0, port);
> >> +	if (!skb->dev) {
> >> +		netdev_dbg(dev, "could not find slave for port %d\n", port);
> >> +		return NULL;
> >> +	}
> >> +
> >> +	/* Remove tag and recalculate checksum */
> >> +	skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
> >> +
> >> +	dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
> >> +
> >> +	skb->offload_fwd_mark = 1;
> >
> > At the very least, please use
> >
> > 	dsa_default_offload_fwd_mark(skb);
> >
> > which does the right thing when the port is not offloading the bridge.
>
> Sure. Can you elaborate on what you mean by "at the very least"? Can it
> be improved even further?

The elaboration is right below. skb->offload_fwd_mark should be set to
zero for packets that have been forwarded only to the host (like packets
that have hit a trapping rule). I guess the switch will denote this
piece of info through the REASON code.

This allows the software bridge data path to know to not flood packets
that have already been flooded by the switch in its hardware data path.

Control packets can still be re-forwarded by the software data path,
even if the switch has trapped/not forwarded them, through the
"group_fwd_mask" option in "man ip-link").

> >
> > Also tell us more about REASON and ALLOW. Is there a bit in the RX tag
> > which denotes that the packet was forwarded only to the host?
>
> As I wrote to Andrew, REASON is undocumented and I have not investigated
> this field yet. I have addressed ALLOW upstairs in this email, but
> suffice to say I am not sure.

On xmit, you have. On rcv (switch->CPU), I am not sure whether the
switch will ever set ALLOW to 1, and what is the meaning of that.
Alvin Šipraga Aug. 22, 2021, 11:27 p.m. UTC | #7
Hi Andrew,

On 8/23/21 1:14 AM, Andrew Lunn wrote:
>>>> + */
>>>> +
>>>> +#include <linux/etherdevice.h>
>>>> +#include <linux/bits.h>
>>>> +
>>>> +#include "dsa_priv.h"
>>>> +
>>>> +#define RTL8_4_TAG_LEN			8
>>>> +#define RTL8_4_ETHERTYPE		0x8899
>>>
>>> Please add this to include/uapi/linux/if_ether.h
> 
> Maybe call it ETH_P_REALTEK, and comment /* Multiple Proprietary
> protocols */ ?
> 
> If you do it in an individual patch, you can explain more in the
> commit message about it being used for different protocols by Realtek,
> and that no assumptions should be made when trying to decode it.

Sounds good, I'll do that in v2.

> 
> 	  Andrew
>
Alvin Šipraga Aug. 22, 2021, 11:37 p.m. UTC | #8
Hi Vladimir,

On 8/23/21 1:25 AM, Vladimir Oltean wrote:
> On Sun, Aug 22, 2021 at 11:11:21PM +0000, Alvin Šipraga wrote:
>>>> + *    KEEP       | preserve packet VLAN tag format
>>>
>>> What does it mean to preserve packet VLAN tag format? Trying to
>>> understand if the sane thing is to clear or set this bit. Does it mean
>>> to strip the VLAN tag on egress if the VLAN is configured as
>>> egress-untagged on the port?
>>
>> I suppose you mean "Does it mean _don't_ strip the VLAN tag on egress..."?
>>
>> I'm not sure what the semantics of this KEEP are. When I configure the
>> ports to be egress-untagged, the packets leave the port untagged. When I
>> configure the ports without egress-untagged, the packets leave the port
>> tagged. This is with the code as you see it - so KEEP=0. If I am to
>> hazard a guess, maybe it overrides any port-based egress-untagged
>> setting. I will run some tests tomorrow.
> 
> Ok, then it makes sense to set KEEP=0 and not override the port settings.

OK, glad you agree.

> 
>>>
>>>> +	*p = htons(~(1 << 15) & BIT(dp->index));
>>>
>>> I am deeply confused by this line.
>>>
>>> ~(1 << 15) is GENMASK(14, 0)
>>> By AND-ing it with BIT(dp->index), what do you gain?
>>
>> Deliberate verbosity for the human who was engaged in writing the
>> tagging driver to begin with, but obviously stupid. I'll remove.
> 
> I wouldn't say "stupid", but it's non-obvious, hard to read and at the same time pointless.
> I had to take out the abacus to see if I'm missing something.
> 
>>>> +	/* Ignore FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
>>>> +	p = (__be16 *)(tag + 4);
>>>
>>> Delete then?
>>
>> Deliberate verbosity again - but I figure any half-decent compiler will
>> optimize this out to begin with. I thought it serves as a perfectly fine
>> "add stuff here" notice together with the comment, but I can remove in v2.
> 
> Keeping just the comment is fine, but having the line of code is pretty
> pointless. Just like any half-decent compiler will optimize it out, any
> developer with half a brain will figure out what to do to parse
> FID_EN ... LEARN_DIS thanks to the other comments.

Point well made :-) I'll clean up in v2. Thanks!

> 
>>>
>>>> +
>>>> +	/* Ignore ALLOW; parse TX (switch->CPU) */
>>>> +	p = (__be16 *)(tag + 6);
>>>> +	tmp = ntohs(*p);
>>>> +	port = tmp & 0xf; /* Port number is the LSB 4 bits */
>>>> +
>>>> +	skb->dev = dsa_master_find_slave(dev, 0, port);
>>>> +	if (!skb->dev) {
>>>> +		netdev_dbg(dev, "could not find slave for port %d\n", port);
>>>> +		return NULL;
>>>> +	}
>>>> +
>>>> +	/* Remove tag and recalculate checksum */
>>>> +	skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
>>>> +
>>>> +	dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
>>>> +
>>>> +	skb->offload_fwd_mark = 1;
>>>
>>> At the very least, please use
>>>
>>> 	dsa_default_offload_fwd_mark(skb);
>>>
>>> which does the right thing when the port is not offloading the bridge.
>>
>> Sure. Can you elaborate on what you mean by "at the very least"? Can it
>> be improved even further?
> 
> The elaboration is right below. skb->offload_fwd_mark should be set to
> zero for packets that have been forwarded only to the host (like packets
> that have hit a trapping rule). I guess the switch will denote this
> piece of info through the REASON code.

Yes, I think it will be communicated in REASON too. I haven't gotten to 
deciphering the contents of this field since it has not been needed so 
far: the ports are fully isolated and all bridging is done in software.

> 
> This allows the software bridge data path to know to not flood packets
> that have already been flooded by the switch in its hardware data path.
> 
> Control packets can still be re-forwarded by the software data path,
> even if the switch has trapped/not forwarded them, through the
> "group_fwd_mask" option in "man ip-link").

Since the driver doesn't support any offloading right now (ports are 
isolated), would you be OK with me just setting 
dsa_default_offload_fwd_mark(skb) for now? I will revisit this in the 
future when I have more time at work to implement some of the offloading 
features, but it's not something I can commit to in the near future.

> 
>>>
>>> Also tell us more about REASON and ALLOW. Is there a bit in the RX tag
>>> which denotes that the packet was forwarded only to the host?
>>
>> As I wrote to Andrew, REASON is undocumented and I have not investigated
>> this field yet. I have addressed ALLOW upstairs in this email, but
>> suffice to say I am not sure.
> 
> On xmit, you have. On rcv (switch->CPU), I am not sure whether the
> switch will ever set ALLOW to 1, and what is the meaning of that.

I think ALLOW is only relevant on xmit (CPU->switch). I can make it more 
clear in the comment in v2.
Vladimir Oltean Aug. 22, 2021, 11:45 p.m. UTC | #9
On Sun, Aug 22, 2021 at 11:37:28PM +0000, Alvin Šipraga wrote:
> >>>> +	skb->offload_fwd_mark = 1;
> >>>
> >>> At the very least, please use
> >>>
> >>> 	dsa_default_offload_fwd_mark(skb);
> >>>
> >>> which does the right thing when the port is not offloading the bridge.
> >>
> >> Sure. Can you elaborate on what you mean by "at the very least"? Can it
> >> be improved even further?
> >
> > The elaboration is right below. skb->offload_fwd_mark should be set to
> > zero for packets that have been forwarded only to the host (like packets
> > that have hit a trapping rule). I guess the switch will denote this
> > piece of info through the REASON code.
>
> Yes, I think it will be communicated in REASON too. I haven't gotten to
> deciphering the contents of this field since it has not been needed so
> far: the ports are fully isolated and all bridging is done in software.

In that case, setting skb->offload_fwd_mark to true is absolutely wrong,
since the bridge is told that no software forwarding should be done
between ports, as it was already done in hardware (see nbp_switchdev_allowed_egress).

I wonder how this has ever worked? Are you completely sure that bridging
is done in software?
Alvin Šipraga Aug. 23, 2021, 12:28 a.m. UTC | #10
On 8/23/21 1:45 AM, Vladimir Oltean wrote:
> On Sun, Aug 22, 2021 at 11:37:28PM +0000, Alvin Šipraga wrote:
>>>>>> +	skb->offload_fwd_mark = 1;
>>>>>
>>>>> At the very least, please use
>>>>>
>>>>> 	dsa_default_offload_fwd_mark(skb);
>>>>>
>>>>> which does the right thing when the port is not offloading the bridge.
>>>>
>>>> Sure. Can you elaborate on what you mean by "at the very least"? Can it
>>>> be improved even further?
>>>
>>> The elaboration is right below. skb->offload_fwd_mark should be set to
>>> zero for packets that have been forwarded only to the host (like packets
>>> that have hit a trapping rule). I guess the switch will denote this
>>> piece of info through the REASON code.
>>
>> Yes, I think it will be communicated in REASON too. I haven't gotten to
>> deciphering the contents of this field since it has not been needed so
>> far: the ports are fully isolated and all bridging is done in software.
> 
> In that case, setting skb->offload_fwd_mark to true is absolutely wrong,
> since the bridge is told that no software forwarding should be done
> between ports, as it was already done in hardware (see nbp_switchdev_allowed_egress).
> 
> I wonder how this has ever worked? Are you completely sure that bridging
> is done in software?

You are absolutely right, and indeed I checked just now and the bridging 
is not working at all.

Deleting the line (i.e. skb->offload_fwd_mark = 0) restores the expected 
bridging behaviour.

Based on what you have said, do I understand correctly that 
offload_fwd_mark shouldn't be set until bridge hardware offloading has 
been implemented?

Thanks for your detailed review so far.
Vladimir Oltean Aug. 23, 2021, 12:31 a.m. UTC | #11
On Mon, Aug 23, 2021 at 12:28:51AM +0000, Alvin Šipraga wrote:
> On 8/23/21 1:45 AM, Vladimir Oltean wrote:
> > On Sun, Aug 22, 2021 at 11:37:28PM +0000, Alvin Šipraga wrote:
> >>>>>> +	skb->offload_fwd_mark = 1;
> >>>>>
> >>>>> At the very least, please use
> >>>>>
> >>>>> 	dsa_default_offload_fwd_mark(skb);
> >>>>>
> >>>>> which does the right thing when the port is not offloading the bridge.
> >>>>
> >>>> Sure. Can you elaborate on what you mean by "at the very least"? Can it
> >>>> be improved even further?
> >>>
> >>> The elaboration is right below. skb->offload_fwd_mark should be set to
> >>> zero for packets that have been forwarded only to the host (like packets
> >>> that have hit a trapping rule). I guess the switch will denote this
> >>> piece of info through the REASON code.
> >>
> >> Yes, I think it will be communicated in REASON too. I haven't gotten to
> >> deciphering the contents of this field since it has not been needed so
> >> far: the ports are fully isolated and all bridging is done in software.
> >
> > In that case, setting skb->offload_fwd_mark to true is absolutely wrong,
> > since the bridge is told that no software forwarding should be done
> > between ports, as it was already done in hardware (see nbp_switchdev_allowed_egress).
> >
> > I wonder how this has ever worked? Are you completely sure that bridging
> > is done in software?
>
> You are absolutely right, and indeed I checked just now and the bridging
> is not working at all.
>
> Deleting the line (i.e. skb->offload_fwd_mark = 0) restores the expected
> bridging behaviour.
>
> Based on what you have said, do I understand correctly that
> offload_fwd_mark shouldn't be set until bridge hardware offloading has
> been implemented?
>
> Thanks for your detailed review so far.

So back to my initial suggestion:

| At the very least, please use
|
| 	dsa_default_offload_fwd_mark(skb);
|
| which does the right thing when the port is not offloading the bridge.

This way, you won't have to touch this code even after you start
implementing .port_bridge_join and .port_bridge_leave. It deals with
both cases.
diff mbox series

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0c2cba45fa79..6d8b5a11d99a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -51,6 +51,7 @@  struct phylink_link_state;
 #define DSA_TAG_PROTO_SEVILLE_VALUE		21
 #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE		22
 #define DSA_TAG_PROTO_SJA1110_VALUE		23
+#define DSA_TAG_PROTO_RTL8_4_VALUE		24
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
@@ -77,6 +78,7 @@  enum dsa_tag_protocol {
 	DSA_TAG_PROTO_OCELOT_8021Q	= DSA_TAG_PROTO_OCELOT_8021Q_VALUE,
 	DSA_TAG_PROTO_SEVILLE		= DSA_TAG_PROTO_SEVILLE_VALUE,
 	DSA_TAG_PROTO_SJA1110		= DSA_TAG_PROTO_SJA1110_VALUE,
+	DSA_TAG_PROTO_RTL8_4		= DSA_TAG_PROTO_RTL8_4_VALUE,
 };
 
 struct dsa_switch;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 548285539752..470a2f3e8f75 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -99,6 +99,12 @@  config NET_DSA_TAG_RTL4_A
 	  Realtek switches with 4 byte protocol A tags, sich as found in
 	  the Realtek RTL8366RB.
 
+config NET_DSA_TAG_RTL8_4
+	tristate "Tag driver for Realtek 8 byte protocol 4 tags"
+	help
+	  Say Y or M if you want to enable support for tagging frames for Realtek
+	  switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
+
 config NET_DSA_TAG_OCELOT
 	tristate "Tag driver for Ocelot family of switches, using NPI port"
 	depends on MSCC_OCELOT_SWITCH_LIB || \
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 67ea009f242c..01282817e80e 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -11,6 +11,7 @@  obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
 obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
 obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
 obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
+obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
 obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
 obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
 obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
new file mode 100644
index 000000000000..1082bafb6a2e
--- /dev/null
+++ b/net/dsa/tag_rtl8_4.c
@@ -0,0 +1,178 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handler for Realtek 8 byte switch tags
+ *
+ * Copyright (C) 2021 Alvin Šipraga <alsi@bang-olufsen.dk>
+ *
+ * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
+ * named tag_rtl8_4.
+ *
+ * This "proprietary tag" header has the following format:
+ *
+ *  -------------------------------------------
+ *  | MAC DA | MAC SA | 8 byte tag | Type | ...
+ *  -------------------------------------------
+ *     _______________/            \______________________________________
+ *    /                                                                   \
+ *  0                                  7|8                                 15
+ *  |-----------------------------------+-----------------------------------|---
+ *  |                               (16-bit)                                | ^
+ *  |                       Realtek EtherType [0x8899]                      | |
+ *  |-----------------------------------+-----------------------------------| 8
+ *  |              (8-bit)              |              (8-bit)              |
+ *  |          Protocol [0x04]          |              REASON               | b
+ *  |-----------------------------------+-----------------------------------| y
+ *  |   (1)  | (1) | (2) |   (1)  | (3) | (1)  | (1) |    (1)    |   (5)    | t
+ *  | FID_EN |  X  | FID | PRI_EN | PRI | KEEP |  X  | LEARN_DIS |    X     | e
+ *  |-----------------------------------+-----------------------------------| s
+ *  |   (1)  |                       (15-bit)                               | |
+ *  |  ALLOW |                        TX/RX                                 | v
+ *  |-----------------------------------+-----------------------------------|---
+ *
+ * With the following field descriptions:
+ *
+ *    field      | description
+ *   ------------+-------------
+ *    Realtek    | 0x8899: indicates that this is a proprietary Realtek tag;
+ *     EtherType |         note that Realtek uses the same EtherType for
+ *               |         other incompatible tag formats (e.g. tag_rtl4_a.c)
+ *    Protocol   | 0x04: indicates that this tag conforms to this format
+ *    X          | reserved
+ *   ------------+-------------
+ *    REASON     | reason for forwarding packet to CPU
+ *    FID_EN     | 1: packet has an FID
+ *               | 0: no FID
+ *    FID        | FID of packet (if FID_EN=1)
+ *    PRI_EN     | 1: force priority of packet
+ *               | 0: don't force priority
+ *    PRI        | priority of packet (if PRI_EN=1)
+ *    KEEP       | preserve packet VLAN tag format
+ *    LEARN_DIS  | don't learn the source MAC address of the packet
+ *    ALLOW      | 1: treat TX/RX field as an allowance port mask, meaning the
+ *               |    packet may only be forwarded to ports specified in the
+ *               |    mask
+ *               | 0: no allowance port mask, TX/RX field is the forwarding
+ *               |    port mask
+ *    TX/RX      | TX (switch->CPU): port number the packet was received on
+ *               | RX (CPU->switch): forwarding port mask (if ALLOW=0)
+ *               |                   allowance port mask (if ALLOW=1)
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/bits.h>
+
+#include "dsa_priv.h"
+
+#define RTL8_4_TAG_LEN			8
+#define RTL8_4_ETHERTYPE		0x8899
+/* 0x04 = RTL8365MB DSA protocol
+ */
+#define RTL8_4_PROTOCOL_RTL8365MB	0x04
+
+static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
+				       struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	__be16 *p;
+	u8 *tag;
+	u16 out;
+
+	/* Pad out so that the (stripped) packet is at least 64 bytes long
+	 * (including FCS), otherwise the switch will drop the packet.
+	 * Then we need an additional 8 bytes for the Realtek tag.
+	 */
+	if (__skb_put_padto(skb, ETH_ZLEN + RTL8_4_TAG_LEN, false))
+		return NULL;
+
+	skb_push(skb, RTL8_4_TAG_LEN);
+
+	dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
+	tag = dsa_etype_header_pos_tx(skb);
+
+	/* Set Realtek EtherType */
+	p = (__be16 *)tag;
+	*p = htons(RTL8_4_ETHERTYPE);
+
+	/* Set Protocol; zero REASON */
+	p = (__be16 *)(tag + 2);
+	*p = htons(RTL8_4_PROTOCOL_RTL8365MB << 8);
+
+	/* Zero FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
+	p = (__be16 *)(tag + 4);
+	*p = 0;
+
+	/* Zero ALLOW; set RX (CPU->switch) forwarding port mask */
+	p = (__be16 *)(tag + 6);
+	out = BIT(dp->index);
+	*p = htons(~(1 << 15) & BIT(dp->index));
+
+	return skb;
+}
+
+static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	__be16 *p;
+	u16 etype;
+	u8 proto;
+	u8 *tag;
+	u8 port;
+	u16 tmp;
+
+	if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
+		return NULL;
+
+	tag = dsa_etype_header_pos_rx(skb);
+
+	/* Parse Realtek EtherType */
+	p = (__be16 *)tag;
+	etype = ntohs(*p);
+	if (unlikely(etype != RTL8_4_ETHERTYPE)) {
+		netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
+		return NULL;
+	}
+
+	/* Parse Protocol; ignore REASON */
+	p = (__be16 *)(tag + 2);
+	tmp = ntohs(*p);
+	proto = tmp >> 8;
+	if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) {
+		netdev_dbg(dev, "unknown realtek protocol 0x%02x\n", proto);
+		return NULL;
+	}
+
+	/* Ignore FID_EN, FID, PRI_EN, PRI, KEEP, LEARN_DIS */
+	p = (__be16 *)(tag + 4);
+
+	/* Ignore ALLOW; parse TX (switch->CPU) */
+	p = (__be16 *)(tag + 6);
+	tmp = ntohs(*p);
+	port = tmp & 0xf; /* Port number is the LSB 4 bits */
+
+	skb->dev = dsa_master_find_slave(dev, 0, port);
+	if (!skb->dev) {
+		netdev_dbg(dev, "could not find slave for port %d\n", port);
+		return NULL;
+	}
+
+	/* Remove tag and recalculate checksum */
+	skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
+
+	dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
+
+	skb->offload_fwd_mark = 1;
+
+	return skb;
+}
+
+static const struct dsa_device_ops rtl8_4_netdev_ops = {
+	.name = "rtl8_4",
+	.proto = DSA_TAG_PROTO_RTL8_4,
+	.xmit = rtl8_4_tag_xmit,
+	.rcv = rtl8_4_tag_rcv,
+	.needed_headroom = RTL8_4_TAG_LEN,
+};
+module_dsa_tag_driver(rtl8_4_netdev_ops);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4);