diff mbox series

[net-next,2/2] net: dsa: realtek: Rewrite RTL8366RB MTU handling

Message ID 20231209-rtl8366rb-mtu-fix-v1-2-df863e2b2b2a@linaro.org (mailing list archive)
State Accepted
Commit d577ca429af36a3aea38d53d11be56dff015dfc9
Delegated to: Netdev Maintainers
Headers show
Series net: dsa: realtek: Two RTL8366RB fixes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1115 this patch: 1115
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 1142 this patch: 1142
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1142 this patch: 1142
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 85 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Linus Walleij Dec. 9, 2023, 10:37 p.m. UTC
The MTU callbacks are in layer 1 size, so for example 1500
bytes is a normal setting. Cache this size, and only add
the layer 2 framing right before choosing the setting. On
the CPU port this will however include the DSA tag since
this is transmitted from the parent ethernet interface!

Add the layer 2 overhead such as ethernet and VLAN framing
and FCS before selecting the size in the register.

This will make the code easier to understand.

The rtl8366rb_max_mtu() callback returns a bogus MTU
just subtracting the CPU tag, which is the only thing
we should NOT subtract. Return the correct layer 1
max MTU after removing headers and checksum.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/net/dsa/realtek/rtl8366rb.c | 48 +++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 18 deletions(-)

Comments

Alvin Šipraga Dec. 10, 2023, 1 p.m. UTC | #1
On Sat, Dec 09, 2023 at 11:37:35PM +0100, Linus Walleij wrote:
> The MTU callbacks are in layer 1 size, so for example 1500
> bytes is a normal setting. Cache this size, and only add
> the layer 2 framing right before choosing the setting. On
> the CPU port this will however include the DSA tag since
> this is transmitted from the parent ethernet interface!
> 
> Add the layer 2 overhead such as ethernet and VLAN framing
> and FCS before selecting the size in the register.
> 
> This will make the code easier to understand.
> 
> The rtl8366rb_max_mtu() callback returns a bogus MTU
> just subtracting the CPU tag, which is the only thing
> we should NOT subtract. Return the correct layer 1
> max MTU after removing headers and checksum.
> 
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>

> ---
>  drivers/net/dsa/realtek/rtl8366rb.c | 48 +++++++++++++++++++++++--------------
>  1 file changed, 30 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
> index 887afd1392cb..e3b6a470ca67 100644
> --- a/drivers/net/dsa/realtek/rtl8366rb.c
> +++ b/drivers/net/dsa/realtek/rtl8366rb.c
> @@ -15,6 +15,7 @@
>  #include <linux/bitops.h>
>  #include <linux/etherdevice.h>
>  #include <linux/if_bridge.h>
> +#include <linux/if_vlan.h>
>  #include <linux/interrupt.h>
>  #include <linux/irqdomain.h>
>  #include <linux/irqchip/chained_irq.h>
> @@ -929,15 +930,19 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
>  	if (ret)
>  		return ret;
>  
> -	/* Set maximum packet length to 1536 bytes */
> +	/* Set default maximum packet length to 1536 bytes */
>  	ret = regmap_update_bits(priv->map, RTL8366RB_SGCR,
>  				 RTL8366RB_SGCR_MAX_LENGTH_MASK,
>  				 RTL8366RB_SGCR_MAX_LENGTH_1536);
>  	if (ret)
>  		return ret;
> -	for (i = 0; i < RTL8366RB_NUM_PORTS; i++)
> -		/* layer 2 size, see rtl8366rb_change_mtu() */
> -		rb->max_mtu[i] = 1532;
> +	for (i = 0; i < RTL8366RB_NUM_PORTS; i++) {
> +		if (i == priv->cpu_port)
> +			/* CPU port need to also accept the tag */
> +			rb->max_mtu[i] = ETH_DATA_LEN + RTL8366RB_CPU_TAG_SIZE;
> +		else
> +			rb->max_mtu[i] = ETH_DATA_LEN;
> +	}
>  
>  	/* Disable learning for all ports */
>  	ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
> @@ -1442,24 +1447,29 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
>  	/* Roof out the MTU for the entire switch to the greatest
>  	 * common denominator: the biggest set for any one port will
>  	 * be the biggest MTU for the switch.
> -	 *
> -	 * The first setting, 1522 bytes, is max IP packet 1500 bytes,
> -	 * plus ethernet header, 1518 bytes, plus CPU tag, 4 bytes.
> -	 * This function should consider the parameter an SDU, so the
> -	 * MTU passed for this setting is 1518 bytes. The same logic
> -	 * of subtracting the DSA tag of 4 bytes apply to the other
> -	 * settings.
>  	 */
> -	max_mtu = 1518;
> +	max_mtu = ETH_DATA_LEN;
>  	for (i = 0; i < RTL8366RB_NUM_PORTS; i++) {
>  		if (rb->max_mtu[i] > max_mtu)
>  			max_mtu = rb->max_mtu[i];
>  	}
> -	if (max_mtu <= 1518)
> +
> +	/* Translate to layer 2 size.
> +	 * Add ethernet and (possible) VLAN headers, and checksum to the size.
> +	 * For ETH_DATA_LEN (1500 bytes) this will add up to 1522 bytes.
> +	 */
> +	max_mtu += VLAN_ETH_HLEN;
> +	max_mtu += ETH_FCS_LEN;
> +
> +	if (max_mtu <= 1522)
>  		len = RTL8366RB_SGCR_MAX_LENGTH_1522;
> -	else if (max_mtu > 1518 && max_mtu <= 1532)
> +	else if (max_mtu > 1522 && max_mtu <= 1536)
> +		/* This will be the most common default if using VLAN and
> +		 * CPU tagging on a port as both VLAN and CPU tag will
> +		 * result in 1518 + 4 + 4 = 1526 bytes.
> +		 */
>  		len = RTL8366RB_SGCR_MAX_LENGTH_1536;
> -	else if (max_mtu > 1532 && max_mtu <= 1548)
> +	else if (max_mtu > 1536 && max_mtu <= 1552)
>  		len = RTL8366RB_SGCR_MAX_LENGTH_1552;
>  	else
>  		len = RTL8366RB_SGCR_MAX_LENGTH_16000;
> @@ -1471,10 +1481,12 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
>  
>  static int rtl8366rb_max_mtu(struct dsa_switch *ds, int port)
>  {
> -	/* The max MTU is 16000 bytes, so we subtract the CPU tag
> -	 * and the max presented to the system is 15996 bytes.
> +	/* The max MTU is 16000 bytes, so we subtract the ethernet
> +	 * headers with VLAN and checksum and arrive at
> +	 * 16000 - 18 - 4 = 15978. This does not include the CPU tag
> +	 * since that is added to the requested MTU by the DSA framework.
>  	 */
> -	return 15996;
> +	return 16000 - VLAN_ETH_HLEN - ETH_FCS_LEN;
>  }
>  
>  static int rtl8366rb_get_vlan_4k(struct realtek_priv *priv, u32 vid,
> 
> -- 
> 2.34.1
>
Luiz Angelo Daros de Luca Dec. 11, 2023, 3:14 a.m. UTC | #2
> The MTU callbacks are in layer 1 size, so for example 1500
> bytes is a normal setting. Cache this size, and only add
> the layer 2 framing right before choosing the setting. On
> the CPU port this will however include the DSA tag since
> this is transmitted from the parent ethernet interface!
>
> Add the layer 2 overhead such as ethernet and VLAN framing
> and FCS before selecting the size in the register.
>
> This will make the code easier to understand.
>
> The rtl8366rb_max_mtu() callback returns a bogus MTU
> just subtracting the CPU tag, which is the only thing
> we should NOT subtract. Return the correct layer 1
> max MTU after removing headers and checksum.
>
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

Don't we need a Fixes tag?

> ---
>  drivers/net/dsa/realtek/rtl8366rb.c | 48 +++++++++++++++++++++++--------------
>  1 file changed, 30 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
> index 887afd1392cb..e3b6a470ca67 100644
> --- a/drivers/net/dsa/realtek/rtl8366rb.c
> +++ b/drivers/net/dsa/realtek/rtl8366rb.c
> @@ -15,6 +15,7 @@
>  #include <linux/bitops.h>
>  #include <linux/etherdevice.h>
>  #include <linux/if_bridge.h>
> +#include <linux/if_vlan.h>
>  #include <linux/interrupt.h>
>  #include <linux/irqdomain.h>
>  #include <linux/irqchip/chained_irq.h>
> @@ -929,15 +930,19 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
>         if (ret)
>                 return ret;
>
> -       /* Set maximum packet length to 1536 bytes */
> +       /* Set default maximum packet length to 1536 bytes */
>         ret = regmap_update_bits(priv->map, RTL8366RB_SGCR,
>                                  RTL8366RB_SGCR_MAX_LENGTH_MASK,
>                                  RTL8366RB_SGCR_MAX_LENGTH_1536);
>         if (ret)
>                 return ret;
> -       for (i = 0; i < RTL8366RB_NUM_PORTS; i++)
> -               /* layer 2 size, see rtl8366rb_change_mtu() */
> -               rb->max_mtu[i] = 1532;
> +       for (i = 0; i < RTL8366RB_NUM_PORTS; i++) {
> +               if (i == priv->cpu_port)
> +                       /* CPU port need to also accept the tag */
> +                       rb->max_mtu[i] = ETH_DATA_LEN + RTL8366RB_CPU_TAG_SIZE;
> +               else
> +                       rb->max_mtu[i] = ETH_DATA_LEN;
> +       }
>
>         /* Disable learning for all ports */
>         ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
> @@ -1442,24 +1447,29 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
>         /* Roof out the MTU for the entire switch to the greatest
>          * common denominator: the biggest set for any one port will
>          * be the biggest MTU for the switch.
> -        *
> -        * The first setting, 1522 bytes, is max IP packet 1500 bytes,
> -        * plus ethernet header, 1518 bytes, plus CPU tag, 4 bytes.
> -        * This function should consider the parameter an SDU, so the
> -        * MTU passed for this setting is 1518 bytes. The same logic
> -        * of subtracting the DSA tag of 4 bytes apply to the other
> -        * settings.
>          */
> -       max_mtu = 1518;
> +       max_mtu = ETH_DATA_LEN;
>         for (i = 0; i < RTL8366RB_NUM_PORTS; i++) {
>                 if (rb->max_mtu[i] > max_mtu)
>                         max_mtu = rb->max_mtu[i];
>         }

I'm not sure you need this old code. Whenever you change the MTU in a
user port, it will also call rtl8366rb_change_mtu() for the CPU port
if the max MTU changes. A call to change both the port and the CPU
port makes sense when you need to update something inside the switch
to set the MTU per port. However, these realtek switches only have a
global MTU size for all ports. What I did in rtl8365mb is to ignore
any MTU change except it is related to the CPU port. I hope this is a
"core feature" that you can rely on.

If that works for you, you can also drop the rb->max_mtu and the code
in rtl8366rb_setup(), calling rtl8366rb_change_mtu() for the CPU port
instead.

> -       if (max_mtu <= 1518)
> +
> +       /* Translate to layer 2 size.
> +        * Add ethernet and (possible) VLAN headers, and checksum to the size.
> +        * For ETH_DATA_LEN (1500 bytes) this will add up to 1522 bytes.
> +        */
> +       max_mtu += VLAN_ETH_HLEN;
> +       max_mtu += ETH_FCS_LEN;
> +
> +       if (max_mtu <= 1522)
>                 len = RTL8366RB_SGCR_MAX_LENGTH_1522;
> -       else if (max_mtu > 1518 && max_mtu <= 1532)
> +       else if (max_mtu > 1522 && max_mtu <= 1536)
> +               /* This will be the most common default if using VLAN and
> +                * CPU tagging on a port as both VLAN and CPU tag will
> +                * result in 1518 + 4 + 4 = 1526 bytes.
> +                */
>                 len = RTL8366RB_SGCR_MAX_LENGTH_1536;
> -       else if (max_mtu > 1532 && max_mtu <= 1548)
> +       else if (max_mtu > 1536 && max_mtu <= 1552)
>                 len = RTL8366RB_SGCR_MAX_LENGTH_1552;
>         else
>                 len = RTL8366RB_SGCR_MAX_LENGTH_16000;
> @@ -1471,10 +1481,12 @@ static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
>
>  static int rtl8366rb_max_mtu(struct dsa_switch *ds, int port)
>  {
> -       /* The max MTU is 16000 bytes, so we subtract the CPU tag
> -        * and the max presented to the system is 15996 bytes.
> +       /* The max MTU is 16000 bytes, so we subtract the ethernet
> +        * headers with VLAN and checksum and arrive at
> +        * 16000 - 18 - 4 = 15978. This does not include the CPU tag
> +        * since that is added to the requested MTU by the DSA framework.
>          */
> -       return 15996;
> +       return 16000 - VLAN_ETH_HLEN - ETH_FCS_LEN;
>  }
>
>  static int rtl8366rb_get_vlan_4k(struct realtek_priv *priv, u32 vid,
>
> --
> 2.34.1
>
>
Vladimir Oltean Dec. 11, 2023, 3:30 p.m. UTC | #3
On Mon, Dec 11, 2023 at 12:14:39AM -0300, Luiz Angelo Daros de Luca wrote:
> > The MTU callbacks are in layer 1 size, so for example 1500
> > bytes is a normal setting. Cache this size, and only add
> > the layer 2 framing right before choosing the setting. On
> > the CPU port this will however include the DSA tag since
> > this is transmitted from the parent ethernet interface!
> >
> > Add the layer 2 overhead such as ethernet and VLAN framing
> > and FCS before selecting the size in the register.
> >
> > This will make the code easier to understand.
> >
> > The rtl8366rb_max_mtu() callback returns a bogus MTU
> > just subtracting the CPU tag, which is the only thing
> > we should NOT subtract. Return the correct layer 1
> > max MTU after removing headers and checksum.
> >
> > Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> 
> Don't we need a Fixes tag?

If there's nothing observably broken, no.

> I'm not sure you need this old code. Whenever you change the MTU in a
> user port, it will also call rtl8366rb_change_mtu() for the CPU port
> if the max MTU changes. A call to change both the port and the CPU
> port makes sense when you need to update something inside the switch
> to set the MTU per port. However, these realtek switches only have a
> global MTU size for all ports. What I did in rtl8365mb is to ignore
> any MTU change except it is related to the CPU port. I hope this is a
> "core feature" that you can rely on.

Ha, "core feature" :-/

It is a valid way to simplify the programming of a register that is
global to the switch, when the DSA methods are per port. The largest_mtu
is programmed via DSA_NOTIFIER_MTU to all cascade and CPU ports. So it
makes sense to want to use it. But with a single CPU port, the driver
would program the largest_mtu to hardware once. With 2 CPU ports (not
the case here), twice (although it would still be the same value).

To do as you recommend would still not make it a "core feature".
That would be if DSA were to call a new ds->ops->set_global_mtu() with a
clear contract and expectation about being called once (not once per CPU
port), and with the maximum value only.

Searching through the code to see how widespread the pattern is, I
noticed mv88e6xxx_change_mtu() and I think I found a bug.

static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
{
	struct mv88e6xxx_chip *chip = ds->priv;
	int ret = 0;

	/* For families where we don't know how to alter the MTU,
	 * just accept any value up to ETH_DATA_LEN
	 */
	if (!chip->info->ops->port_set_jumbo_size &&
	    !chip->info->ops->set_max_frame_size) {
		if (new_mtu > ETH_DATA_LEN)
			return -EINVAL;

		return 0;
	}

	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
		new_mtu += EDSA_HLEN;

	mv88e6xxx_reg_lock(chip);
	if (chip->info->ops->port_set_jumbo_size)
		ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
	else if (chip->info->ops->set_max_frame_size)
		ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
	mv88e6xxx_reg_unlock(chip);

	return ret;
}

If the chip uses set_max_frame_size() - which is not per port - then it
will accept any latest value, and not look just at the largest_mtu.

b53_change_mtu() also looks like it suffers from a similar problem, it
always programs the latest per-port value to a global register.

So I guess there is ample opportunity to get this wrong, and maybe
making the global MTU "core functionality" is worth considering.
As "net-next" material - I think the bugs are sufficiently artificial,
and workarounds exist, to not bother the stable kernels with fixes over
the existing API.

Would you volunteer to do that?
Luiz Angelo Daros de Luca Dec. 11, 2023, 9:07 p.m. UTC | #4
> > I'm not sure you need this old code. Whenever you change the MTU in a
> > user port, it will also call rtl8366rb_change_mtu() for the CPU port
> > if the max MTU changes. A call to change both the port and the CPU
> > port makes sense when you need to update something inside the switch
> > to set the MTU per port. However, these realtek switches only have a
> > global MTU size for all ports. What I did in rtl8365mb is to ignore
> > any MTU change except it is related to the CPU port. I hope this is a
> > "core feature" that you can rely on.
>
> Ha, "core feature" :-/
>
> It is a valid way to simplify the programming of a register that is
> global to the switch, when the DSA methods are per port. The largest_mtu
> is programmed via DSA_NOTIFIER_MTU to all cascade and CPU ports. So it
> makes sense to want to use it. But with a single CPU port, the driver
> would program the largest_mtu to hardware once. With 2 CPU ports (not
> the case here), twice (although it would still be the same value).

I don't see it as a problem. The DSA API is saying to the driver: this
port must now accept this MTU. And it will send it multiple times, at
least for the user port and one or more CPU ports. The driver must
handle that "message" the best it can. I believe we just need to
describe this behavior to make it "official". I discovered that
behavior empirically and then I read the code. Something like this
would be nice:

/*
* MTU change functionality. Switches can also adjust their MRU through
* this method. By MTU, one understands the SDU (L2 payload) length.
* If the switch needs to account for the DSA tag on the CPU port, this
-* method needs to do so privately.
+* method needs to do so privately. An MTU change will also be
+* propagated to every CPU port when the largest MTU in the switch
+* changes, either up or down. Switches with only a global MTU setting
+* can adjust the MTU based only on these calls targeting CPU ports.
*/
int (*port_change_mtu)(struct dsa_switch *ds, int port,
   int new_mtu);

> To do as you recommend would still not make it a "core feature".
> That would be if DSA were to call a new ds->ops->set_global_mtu() with a
> clear contract and expectation about being called once (not once per CPU
> port), and with the maximum value only.

Do we really need to expand the API? A new ds_ops.set_global_mtu() is
just giving the same but less specific info as
ds_ops.port_change_mtu(). I prefer to work in an API with fewer
functions than more optional functions for each specific HW design. It
just makes writing a new driver more complex. The code in the DSA side
will also be more complex with more code paths when only
set_global_mtu is defined.

If setting the MTU multiple times is a problem for a switch, the
driver must keep track of the current global MTU value and do nothing
when not needed. In the case of rtl8366rb, the MTU works in specific
ranges. In many cases, changing the MTU inside a range would not have
an effect on the switch (although the code is still writing the
register with the same value).

> Searching through the code to see how widespread the pattern is, I
> noticed mv88e6xxx_change_mtu() and I think I found a bug.
>
> static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
> {
>         struct mv88e6xxx_chip *chip = ds->priv;
>         int ret = 0;
>
>         /* For families where we don't know how to alter the MTU,
>          * just accept any value up to ETH_DATA_LEN
>          */
>         if (!chip->info->ops->port_set_jumbo_size &&
>             !chip->info->ops->set_max_frame_size) {
>                 if (new_mtu > ETH_DATA_LEN)
>                         return -EINVAL;
>
>                 return 0;
>         }
>
>         if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
>                 new_mtu += EDSA_HLEN;
>
>         mv88e6xxx_reg_lock(chip);
>         if (chip->info->ops->port_set_jumbo_size)
>                 ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
>         else if (chip->info->ops->set_max_frame_size)
>                 ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
>         mv88e6xxx_reg_unlock(chip);
>
>         return ret;
> }
>
> If the chip uses set_max_frame_size() - which is not per port - then it
> will accept any latest value, and not look just at the largest_mtu.

It might just work as the latest value will be the CPU port that is
guaranteed to be the largest one. However, it might temporarily lower
the global MTU between the user and the CPU MTU change. In order to
fix that, it just needs to ignore user port changes.

> b53_change_mtu() also looks like it suffers from a similar problem, it
> always programs the latest per-port value to a global register.

Latest will, in the end, just work because it is a CPU port and the
MTU the largest one. Maybe the sequence could change in a
multithreaded system but I didn't investigate that further. Anyway,
focusing on the CPU port would just work.

> So I guess there is ample opportunity to get this wrong, and maybe
> making the global MTU "core functionality" is worth considering.
> As "net-next" material - I think the bugs are sufficiently artificial,
> and workarounds exist, to not bother the stable kernels with fixes over
> the existing API.
>
> Would you volunteer to do that?

I don't believe we should expand the API but I will volunteer to
update the port_change_mtu comment if accepted. I can also suggest
changes to other drivers when needed but I prefer to not do that
without a proper HW to test it.

Regards,

Luiz
Florian Fainelli Dec. 11, 2023, 9:24 p.m. UTC | #5
On 12/9/23 14:37, Linus Walleij wrote:
> The MTU callbacks are in layer 1 size, so for example 1500
> bytes is a normal setting. Cache this size, and only add
> the layer 2 framing right before choosing the setting. On
> the CPU port this will however include the DSA tag since
> this is transmitted from the parent ethernet interface!
> 
> Add the layer 2 overhead such as ethernet and VLAN framing
> and FCS before selecting the size in the register.
> 
> This will make the code easier to understand.
> 
> The rtl8366rb_max_mtu() callback returns a bogus MTU
> just subtracting the CPU tag, which is the only thing
> we should NOT subtract. Return the correct layer 1
> max MTU after removing headers and checksum.
> 
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Vladimir Oltean Dec. 12, 2023, 1:01 p.m. UTC | #6
On Mon, Dec 11, 2023 at 06:07:56PM -0300, Luiz Angelo Daros de Luca wrote:
> /*
> * MTU change functionality. Switches can also adjust their MRU through
> * this method. By MTU, one understands the SDU (L2 payload) length.
> * If the switch needs to account for the DSA tag on the CPU port, this
> -* method needs to do so privately.
> +* method needs to do so privately. An MTU change will also be
> +* propagated to every CPU port when the largest MTU in the switch
> +* changes, either up or down. Switches with only a global MTU setting
> +* can adjust the MTU based only on these calls targeting CPU ports.
> */
> int (*port_change_mtu)(struct dsa_switch *ds, int port,
>    int new_mtu);

If only this comment were correct. In cascaded switch trees, we may have
switches which don't have a CPU port. Do as instructed here, and they
will not program the MTU to hardware.

> Do we really need to expand the API? A new ds_ops.set_global_mtu() is
> just giving the same but less specific info as
> ds_ops.port_change_mtu(). I prefer to work in an API with fewer
> functions than more optional functions for each specific HW design. It
> just makes writing a new driver more complex. The code in the DSA side
> will also be more complex with more code paths when only
> set_global_mtu is defined.

The alternative is distributing that complexity to individual drivers,
with more chances of getting it wrong. Let's be honest about it, that
has happened.

> If setting the MTU multiple times is a problem for a switch, the
> driver must keep track of the current global MTU value and do nothing
> when not needed. In the case of rtl8366rb, the MTU works in specific
> ranges. In many cases, changing the MTU inside a range would not have
> an effect on the switch (although the code is still writing the
> register with the same value).

It's not a practical problem except for the wasted time baked into the
design. Just worth mentioning. It's a slow path anyway, not a big deal.

> > If the chip uses set_max_frame_size() - which is not per port - then it
> > will accept any latest value, and not look just at the largest_mtu.
> 
> It might just work as the latest value will be the CPU port that is
> guaranteed to be the largest one. However, it might temporarily lower
> the global MTU between the user and the CPU MTU change. In order to
> fix that, it just needs to ignore user port changes.
> 
> > b53_change_mtu() also looks like it suffers from a similar problem, it
> > always programs the latest per-port value to a global register.
> 
> Latest will, in the end, just work because it is a CPU port and the
> MTU the largest one. Maybe the sequence could change in a
> multithreaded system but I didn't investigate that further. Anyway,
> focusing on the CPU port would just work.

The calling sequence in dsa_user_change_mtu() is single-threaded and
starts with dsa_port_mtu_change(), which is the cross-chip notifier
that programs all CPU and DSA ports, and ends with the direct
ds->ops->port_change_mtu() call which programs the user port.

So I hope you now understand why I'm saying they are buggy. No "might work".

> > So I guess there is ample opportunity to get this wrong, and maybe
> > making the global MTU "core functionality" is worth considering.
> > As "net-next" material - I think the bugs are sufficiently artificial,
> > and workarounds exist, to not bother the stable kernels with fixes over
> > the existing API.
> >
> > Would you volunteer to do that?
> 
> I don't believe we should expand the API but I will volunteer to
> update the port_change_mtu comment if accepted. I can also suggest
> changes to other drivers when needed but I prefer to not do that
> without a proper HW to test it.

With N drivers trying to save the same problem (working around the same
framework design), but in subtly different ways, the responsibility
moves to review to make sure they are all correct.

So the options for switches with global MTU are:

- keep an array of MTUs per port, calculate the maximum, program the
  maximum. This is what RTL8366RB does, you don't like that.

- reverse the calling order in dsa_user_change_mtu() between the
  ds->ops->port_change_mtu() on the user port and the dsa_port_mtu_change()
  cross-chip notifier, such that the order is accidentally fine for
  mv88e6xxx and b53. I don't think this qualifies as the "core
  functionality" you've been asking for, so let's move on.

- only act on MTU changes on the CPU port. A few drivers do this,
  obviously it works for them, and their reliance on the framework is
  reasonable. You're suggesting this as a workaround worth promoting
  to a recommendation in include/net/dsa.h, which won't be a good
  recommendation there as-is.

- ignore MTU changes on user ports, implicitly acting on all CPU and
  DSA (upstream and downstream) ports. Functionally ok as a general
  recommendation, but no driver follows it. You seem to be reluctant to
  make changes to drivers you can't test. The risk with both this and
  the previous option is that developers don't realize they need to
  perform the workaround.

- add a new ds->ops->change_global_mtu() which is called once per switch
  from dsa_switch_mtu(). This eliminates the back and forth between the
  driver and the framework. It would make the framework a bit more
  cumbersome, but the drivers generally simpler, except for mv88e6xxx
  which needs to implement both ds->ops->port_change_mtu() and
  ds->ops->change_global_mtu(). But even there, one needs to consider
  that the chip->info->ops->set_max_frame_size() call path needs fixing,
  and we'd end up having 2 call paths (one returning early for user
  ports) within the same mv88e6xxx_change_mtu() function.

- add a new ds->mtu_is_global bit, similar to ds->vlan_filtering_is_global.
  Document that when set, ds->port_change_mtu() will be called once per
  switch, with an invalid port argument which should be ignored. It has
  the same advantages as the ds->ops->change_global_mtu(), but the
  dsa_switch_ops API is a bit more quirky to save space for a different
  function pointer.
Paolo Abeni Dec. 12, 2023, 1:16 p.m. UTC | #7
On Sat, 2023-12-09 at 23:37 +0100, Linus Walleij wrote:
> The MTU callbacks are in layer 1 size, so for example 1500
> bytes is a normal setting. Cache this size, and only add
> the layer 2 framing right before choosing the setting. On
> the CPU port this will however include the DSA tag since
> this is transmitted from the parent ethernet interface!
> 
> Add the layer 2 overhead such as ethernet and VLAN framing
> and FCS before selecting the size in the register.
> 
> This will make the code easier to understand.
> 
> The rtl8366rb_max_mtu() callback returns a bogus MTU
> just subtracting the CPU tag, which is the only thing
> we should NOT subtract. Return the correct layer 1
> max MTU after removing headers and checksum.
> 
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

FTR, my understanding is that the possible MTU API changes still under
discussion, are somewhat orthogonal to this patch, which is suitable
as-is for net.

I'm applying this fixes right now.

Cheers,

Paolo
Paolo Abeni Dec. 12, 2023, 1:18 p.m. UTC | #8
On Tue, 2023-12-12 at 14:16 +0100, Paolo Abeni wrote:
> On Sat, 2023-12-09 at 23:37 +0100, Linus Walleij wrote:
> > The MTU callbacks are in layer 1 size, so for example 1500
> > bytes is a normal setting. Cache this size, and only add
> > the layer 2 framing right before choosing the setting. On
> > the CPU port this will however include the DSA tag since
> > this is transmitted from the parent ethernet interface!
> > 
> > Add the layer 2 overhead such as ethernet and VLAN framing
> > and FCS before selecting the size in the register.
> > 
> > This will make the code easier to understand.
> > 
> > The rtl8366rb_max_mtu() callback returns a bogus MTU
> > just subtracting the CPU tag, which is the only thing
> > we should NOT subtract. Return the correct layer 1
> > max MTU after removing headers and checksum.
> > 
> > Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> 
> FTR, my understanding is that the possible MTU API changes still under
> discussion, are somewhat orthogonal to this patch, which is suitable
> as-is for net.

Typo above -> 'net-next'.

> 
> I'm applying this fixes right now.
> 
> Cheers,
> 
> Paolo
diff mbox series

Patch

diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 887afd1392cb..e3b6a470ca67 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -15,6 +15,7 @@ 
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip/chained_irq.h>
@@ -929,15 +930,19 @@  static int rtl8366rb_setup(struct dsa_switch *ds)
 	if (ret)
 		return ret;
 
-	/* Set maximum packet length to 1536 bytes */
+	/* Set default maximum packet length to 1536 bytes */
 	ret = regmap_update_bits(priv->map, RTL8366RB_SGCR,
 				 RTL8366RB_SGCR_MAX_LENGTH_MASK,
 				 RTL8366RB_SGCR_MAX_LENGTH_1536);
 	if (ret)
 		return ret;
-	for (i = 0; i < RTL8366RB_NUM_PORTS; i++)
-		/* layer 2 size, see rtl8366rb_change_mtu() */
-		rb->max_mtu[i] = 1532;
+	for (i = 0; i < RTL8366RB_NUM_PORTS; i++) {
+		if (i == priv->cpu_port)
+			/* CPU port need to also accept the tag */
+			rb->max_mtu[i] = ETH_DATA_LEN + RTL8366RB_CPU_TAG_SIZE;
+		else
+			rb->max_mtu[i] = ETH_DATA_LEN;
+	}
 
 	/* Disable learning for all ports */
 	ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
@@ -1442,24 +1447,29 @@  static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 	/* Roof out the MTU for the entire switch to the greatest
 	 * common denominator: the biggest set for any one port will
 	 * be the biggest MTU for the switch.
-	 *
-	 * The first setting, 1522 bytes, is max IP packet 1500 bytes,
-	 * plus ethernet header, 1518 bytes, plus CPU tag, 4 bytes.
-	 * This function should consider the parameter an SDU, so the
-	 * MTU passed for this setting is 1518 bytes. The same logic
-	 * of subtracting the DSA tag of 4 bytes apply to the other
-	 * settings.
 	 */
-	max_mtu = 1518;
+	max_mtu = ETH_DATA_LEN;
 	for (i = 0; i < RTL8366RB_NUM_PORTS; i++) {
 		if (rb->max_mtu[i] > max_mtu)
 			max_mtu = rb->max_mtu[i];
 	}
-	if (max_mtu <= 1518)
+
+	/* Translate to layer 2 size.
+	 * Add ethernet and (possible) VLAN headers, and checksum to the size.
+	 * For ETH_DATA_LEN (1500 bytes) this will add up to 1522 bytes.
+	 */
+	max_mtu += VLAN_ETH_HLEN;
+	max_mtu += ETH_FCS_LEN;
+
+	if (max_mtu <= 1522)
 		len = RTL8366RB_SGCR_MAX_LENGTH_1522;
-	else if (max_mtu > 1518 && max_mtu <= 1532)
+	else if (max_mtu > 1522 && max_mtu <= 1536)
+		/* This will be the most common default if using VLAN and
+		 * CPU tagging on a port as both VLAN and CPU tag will
+		 * result in 1518 + 4 + 4 = 1526 bytes.
+		 */
 		len = RTL8366RB_SGCR_MAX_LENGTH_1536;
-	else if (max_mtu > 1532 && max_mtu <= 1548)
+	else if (max_mtu > 1536 && max_mtu <= 1552)
 		len = RTL8366RB_SGCR_MAX_LENGTH_1552;
 	else
 		len = RTL8366RB_SGCR_MAX_LENGTH_16000;
@@ -1471,10 +1481,12 @@  static int rtl8366rb_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 
 static int rtl8366rb_max_mtu(struct dsa_switch *ds, int port)
 {
-	/* The max MTU is 16000 bytes, so we subtract the CPU tag
-	 * and the max presented to the system is 15996 bytes.
+	/* The max MTU is 16000 bytes, so we subtract the ethernet
+	 * headers with VLAN and checksum and arrive at
+	 * 16000 - 18 - 4 = 15978. This does not include the CPU tag
+	 * since that is added to the requested MTU by the DSA framework.
 	 */
-	return 15996;
+	return 16000 - VLAN_ETH_HLEN - ETH_FCS_LEN;
 }
 
 static int rtl8366rb_get_vlan_4k(struct realtek_priv *priv, u32 vid,