diff mbox series

[v2,net-next,07/10] net: dsa: Pass MST state changes to driver

Message ID 20220301100321.951175-8-tobias@waldekranz.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: bridge: Multiple Spanning Trees | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 21 this patch: 21
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang fail Errors and warnings before: 26 this patch: 26
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 21 this patch: 21
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 64 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Tobias Waldekranz March 1, 2022, 10:03 a.m. UTC
Add the usual trampoline functionality from the generic DSA layer down
to the drivers for MST state changes.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 include/net/dsa.h  |  2 ++
 net/dsa/dsa_priv.h |  2 ++
 net/dsa/port.c     | 30 ++++++++++++++++++++++++++++++
 net/dsa/slave.c    |  6 ++++++
 4 files changed, 40 insertions(+)

Comments

Vladimir Oltean March 3, 2022, 10:20 p.m. UTC | #1
On Tue, Mar 01, 2022 at 11:03:18AM +0100, Tobias Waldekranz wrote:
> Add the usual trampoline functionality from the generic DSA layer down
> to the drivers for MST state changes.
> 
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> ---
>  include/net/dsa.h  |  2 ++
>  net/dsa/dsa_priv.h |  2 ++
>  net/dsa/port.c     | 30 ++++++++++++++++++++++++++++++
>  net/dsa/slave.c    |  6 ++++++
>  4 files changed, 40 insertions(+)
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index cc8acb01bd9b..096e6e3a8e1e 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -943,6 +943,8 @@ struct dsa_switch_ops {
>  				     struct dsa_bridge bridge);
>  	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
>  				      u8 state);
> +	int	(*port_mst_state_set)(struct dsa_switch *ds, int port,
> +				      const struct switchdev_mst_state *state);
>  	void	(*port_fast_age)(struct dsa_switch *ds, int port);
>  	int	(*port_pre_bridge_flags)(struct dsa_switch *ds, int port,
>  					 struct switchdev_brport_flags flags,
> diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
> index 87ec0697e92e..a620e079ebc5 100644
> --- a/net/dsa/dsa_priv.h
> +++ b/net/dsa/dsa_priv.h
> @@ -198,6 +198,8 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
>  void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
>  			       const struct dsa_device_ops *tag_ops);
>  int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
> +int dsa_port_set_mst_state(struct dsa_port *dp,
> +			   const struct switchdev_mst_state *state);
>  int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
>  int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
>  void dsa_port_disable_rt(struct dsa_port *dp);
> diff --git a/net/dsa/port.c b/net/dsa/port.c
> index 5f45cb7d70ba..26cfbc8ab499 100644
> --- a/net/dsa/port.c
> +++ b/net/dsa/port.c
> @@ -108,6 +108,36 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
>  	return 0;
>  }
>  
> +int dsa_port_set_mst_state(struct dsa_port *dp,
> +			   const struct switchdev_mst_state *state)
> +{
> +	struct dsa_switch *ds = dp->ds;
> +	int err, port = dp->index;
> +
> +	if (!ds->ops->port_mst_state_set)
> +		return -EOPNOTSUPP;
> +
> +	err = ds->ops->port_mst_state_set(ds, port, state);
> +	if (err)
> +		return err;
> +
> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
> +		switch (state->state) {
> +		case BR_STATE_DISABLED:
> +		case BR_STATE_BLOCKING:
> +		case BR_STATE_LISTENING:
> +			/* Ideally we would only fast age entries
> +			 * belonging to VLANs controlled by this
> +			 * MST.
> +			 */
> +			dsa_port_fast_age(dp);

Does mv88e6xxx support this? If it does, you might just as well
introduce another variant of ds->ops->port_fast_age() for an msti.

And since it is new code, you could require that drivers _do_ support
configuring learning before they could support MSTP. After all, we don't
want to keep legacy mechanisms in place forever.

> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
>  				   bool do_fast_age)
>  {
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index c6ffcd782b5a..32b006a5b778 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -288,6 +288,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
>  
>  		ret = dsa_port_set_state(dp, attr->u.stp_state, true);
>  		break;
> +	case SWITCHDEV_ATTR_ID_PORT_MST_STATE:
> +		if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
> +			return -EOPNOTSUPP;
> +
> +		ret = dsa_port_set_mst_state(dp, &attr->u.mst_state);
> +		break;
>  	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
>  		if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev))
>  			return -EOPNOTSUPP;
> -- 
> 2.25.1
>
Tobias Waldekranz March 10, 2022, 8:54 a.m. UTC | #2
On Fri, Mar 04, 2022 at 00:20, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Tue, Mar 01, 2022 at 11:03:18AM +0100, Tobias Waldekranz wrote:
>> Add the usual trampoline functionality from the generic DSA layer down
>> to the drivers for MST state changes.
>> 
>> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
>> ---
>>  include/net/dsa.h  |  2 ++
>>  net/dsa/dsa_priv.h |  2 ++
>>  net/dsa/port.c     | 30 ++++++++++++++++++++++++++++++
>>  net/dsa/slave.c    |  6 ++++++
>>  4 files changed, 40 insertions(+)
>> 
>> diff --git a/include/net/dsa.h b/include/net/dsa.h
>> index cc8acb01bd9b..096e6e3a8e1e 100644
>> --- a/include/net/dsa.h
>> +++ b/include/net/dsa.h
>> @@ -943,6 +943,8 @@ struct dsa_switch_ops {
>>  				     struct dsa_bridge bridge);
>>  	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
>>  				      u8 state);
>> +	int	(*port_mst_state_set)(struct dsa_switch *ds, int port,
>> +				      const struct switchdev_mst_state *state);
>>  	void	(*port_fast_age)(struct dsa_switch *ds, int port);
>>  	int	(*port_pre_bridge_flags)(struct dsa_switch *ds, int port,
>>  					 struct switchdev_brport_flags flags,
>> diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
>> index 87ec0697e92e..a620e079ebc5 100644
>> --- a/net/dsa/dsa_priv.h
>> +++ b/net/dsa/dsa_priv.h
>> @@ -198,6 +198,8 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
>>  void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
>>  			       const struct dsa_device_ops *tag_ops);
>>  int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
>> +int dsa_port_set_mst_state(struct dsa_port *dp,
>> +			   const struct switchdev_mst_state *state);
>>  int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
>>  int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
>>  void dsa_port_disable_rt(struct dsa_port *dp);
>> diff --git a/net/dsa/port.c b/net/dsa/port.c
>> index 5f45cb7d70ba..26cfbc8ab499 100644
>> --- a/net/dsa/port.c
>> +++ b/net/dsa/port.c
>> @@ -108,6 +108,36 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
>>  	return 0;
>>  }
>>  
>> +int dsa_port_set_mst_state(struct dsa_port *dp,
>> +			   const struct switchdev_mst_state *state)
>> +{
>> +	struct dsa_switch *ds = dp->ds;
>> +	int err, port = dp->index;
>> +
>> +	if (!ds->ops->port_mst_state_set)
>> +		return -EOPNOTSUPP;
>> +
>> +	err = ds->ops->port_mst_state_set(ds, port, state);
>> +	if (err)
>> +		return err;
>> +
>> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
>> +		switch (state->state) {
>> +		case BR_STATE_DISABLED:
>> +		case BR_STATE_BLOCKING:
>> +		case BR_STATE_LISTENING:
>> +			/* Ideally we would only fast age entries
>> +			 * belonging to VLANs controlled by this
>> +			 * MST.
>> +			 */
>> +			dsa_port_fast_age(dp);
>
> Does mv88e6xxx support this? If it does, you might just as well
> introduce another variant of ds->ops->port_fast_age() for an msti.

You can limit ATU operations to a particular FID. So the way I see it we
could either have:

int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)

+ Maybe more generic. You could imagine there being a way to trigger
  this operation from userspace for example.
- We would have to keep the VLAN<->MSTI mapping in the DSA layer in
  order to be able to do the fan-out in dsa_port_set_mst_state.

or:

int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)

+ Let's the mapping be an internal affair in the driver.
- Perhaps, less generically useful.

Which one do you prefer? Or is there a hidden third option? :)

> And since it is new code, you could require that drivers _do_ support
> configuring learning before they could support MSTP. After all, we don't
> want to keep legacy mechanisms in place forever.

By "configuring learning", do you mean this new fast-age-per-vid/msti,
or being able to enable/disable learning per port? If it's the latter,
I'm not sure I understand how those two are related.
Vladimir Oltean March 10, 2022, 10:35 a.m. UTC | #3
On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
> >> +		switch (state->state) {
> >> +		case BR_STATE_DISABLED:
> >> +		case BR_STATE_BLOCKING:
> >> +		case BR_STATE_LISTENING:
> >> +			/* Ideally we would only fast age entries
> >> +			 * belonging to VLANs controlled by this
> >> +			 * MST.
> >> +			 */
> >> +			dsa_port_fast_age(dp);
> >
> > Does mv88e6xxx support this? If it does, you might just as well
> > introduce another variant of ds->ops->port_fast_age() for an msti.
> 
> You can limit ATU operations to a particular FID. So the way I see it we
> could either have:
> 
> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
> 
> + Maybe more generic. You could imagine there being a way to trigger
>   this operation from userspace for example.
> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
>   order to be able to do the fan-out in dsa_port_set_mst_state.
> 
> or:
> 
> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
> 
> + Let's the mapping be an internal affair in the driver.
> - Perhaps, less generically useful.
> 
> Which one do you prefer? Or is there a hidden third option? :)

Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
keeping VLAN to MSTI associations in the DSA layer. Only if we could
retrieve this mapping from the bridge layer - maybe with something
analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
and zeroes.

The reason why I asked for this is because I'm not sure of the
implications of flushing the entire FDB of the port for a single MSTP
state change. It would trigger temporary useless flooding in other MSTIs
at the very least. There isn't any backwards compatibility concern to
speak of, so we can at least try from the beginning to limit the
flushing to the required VLANs.

What I didn't think about, and will be a problem, is
dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
add a "vid" argument to it, and let drivers call it. Thoughts?

Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
isn't a real problem, I suppose we could keep the "port_fast_age" method.

> > And since it is new code, you could require that drivers _do_ support
> > configuring learning before they could support MSTP. After all, we don't
> > want to keep legacy mechanisms in place forever.
> 
> By "configuring learning", do you mean this new fast-age-per-vid/msti,
> or being able to enable/disable learning per port? If it's the latter,
> I'm not sure I understand how those two are related.

The code from dsa_port_set_state() which you've copied:

	if (!dsa_port_can_configure_learning(dp) ||
	    (do_fast_age && dp->learning)) {

has this explanation:

1. DSA keeps standalone ports in the FORWARDING state.
2. DSA also disables address learning on standalone ports, where this is
   possible (dsa_port_can_configure_learning(dp) == true).
3. When a port joins a bridge, it leaves its FORWARDING state from
   standalone mode and inherits the bridge port's BLOCKING state
4. dsa_port_set_state() treats a port transition from FORWARDING to
   BLOCKING as a transition requiring an FDB flush
5. due to (2), the FDB flush at stage (4) is in fact not needed, because
   the FDB of that port should already be empty. Flushing the FDB may be
   a costly operation for some drivers, so it is avoided if possible.

So this is why the "dsa_port_can_configure_learning()" check is there -
for compatibility with drivers that can't configure learning => they
keep learning enabled also in standalone mode => they need an FDB flush
when a standalone port joins a bridge.

What I'm saying is: for drivers that offload MSTP, let's force them to
get the basics right first (have configurable learning), rather than go
forward forever with a backwards compatibility mode.
Tobias Waldekranz March 10, 2022, 4:05 p.m. UTC | #4
On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
>> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
>> >> +		switch (state->state) {
>> >> +		case BR_STATE_DISABLED:
>> >> +		case BR_STATE_BLOCKING:
>> >> +		case BR_STATE_LISTENING:
>> >> +			/* Ideally we would only fast age entries
>> >> +			 * belonging to VLANs controlled by this
>> >> +			 * MST.
>> >> +			 */
>> >> +			dsa_port_fast_age(dp);
>> >
>> > Does mv88e6xxx support this? If it does, you might just as well
>> > introduce another variant of ds->ops->port_fast_age() for an msti.
>> 
>> You can limit ATU operations to a particular FID. So the way I see it we
>> could either have:
>> 
>> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
>> 
>> + Maybe more generic. You could imagine there being a way to trigger
>>   this operation from userspace for example.
>> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
>>   order to be able to do the fan-out in dsa_port_set_mst_state.
>> 
>> or:
>> 
>> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
>> 
>> + Let's the mapping be an internal affair in the driver.
>> - Perhaps, less generically useful.
>> 
>> Which one do you prefer? Or is there a hidden third option? :)
>
> Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
> keeping VLAN to MSTI associations in the DSA layer. Only if we could
> retrieve this mapping from the bridge layer - maybe with something
> analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
> passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
> and zeroes.

That can easily be done. Given that, should we go for port_vlan_fast_age
instead? port_msti_fast_age feels like an awkward interface, since I
don't think there is any hardware out there that can actually perform
that operation without internally fanning it out over all affected VIDs
(or FIDs in the case of mv88e6xxx).

> The reason why I asked for this is because I'm not sure of the
> implications of flushing the entire FDB of the port for a single MSTP
> state change. It would trigger temporary useless flooding in other MSTIs
> at the very least. There isn't any backwards compatibility concern to
> speak of, so we can at least try from the beginning to limit the
> flushing to the required VLANs.

Aside from the performance implications of flows being temporarily
flooded I don't think there are any.

I suppose if you've disabled flooding of unknown unicast on that port,
you would loose the flow until you see some return traffic (or when one
side gives up and ARPs). While somewhat esoteric, it would be nice to
handle this case if the hardware supports it.

> What I didn't think about, and will be a problem, is
> dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
> The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
> add a "vid" argument to it, and let drivers call it. Thoughts?

To me, this seems to be another argument in favor of
port_vlan_fast_age. That way you would know the VIDs being flushed at
the DSA layer, and driver writers needn't concern themselves with having
to remember to generate the proper notifications back to the bridge.

> Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
> isn't a real problem, I suppose we could keep the "port_fast_age" method.

What about falling back to it if the driver doesn't support per-VLAN
flushing? Flushing all entries will work in most cases, at the cost of
some temporary flooding. Seems more useful than refusing the offload
completely.

>> > And since it is new code, you could require that drivers _do_ support
>> > configuring learning before they could support MSTP. After all, we don't
>> > want to keep legacy mechanisms in place forever.
>> 
>> By "configuring learning", do you mean this new fast-age-per-vid/msti,
>> or being able to enable/disable learning per port? If it's the latter,
>> I'm not sure I understand how those two are related.
>
> The code from dsa_port_set_state() which you've copied:
>
> 	if (!dsa_port_can_configure_learning(dp) ||
> 	    (do_fast_age && dp->learning)) {
>
> has this explanation:
>
> 1. DSA keeps standalone ports in the FORWARDING state.
> 2. DSA also disables address learning on standalone ports, where this is
>    possible (dsa_port_can_configure_learning(dp) == true).
> 3. When a port joins a bridge, it leaves its FORWARDING state from
>    standalone mode and inherits the bridge port's BLOCKING state
> 4. dsa_port_set_state() treats a port transition from FORWARDING to
>    BLOCKING as a transition requiring an FDB flush
> 5. due to (2), the FDB flush at stage (4) is in fact not needed, because
>    the FDB of that port should already be empty. Flushing the FDB may be
>    a costly operation for some drivers, so it is avoided if possible.
>
> So this is why the "dsa_port_can_configure_learning()" check is there -
> for compatibility with drivers that can't configure learning => they
> keep learning enabled also in standalone mode => they need an FDB flush
> when a standalone port joins a bridge.
>
> What I'm saying is: for drivers that offload MSTP, let's force them to
> get the basics right first (have configurable learning), rather than go
> forward forever with a backwards compatibility mode.

Makes sense, I'll just move it up to the initial capability check.
Vladimir Oltean March 10, 2022, 4:18 p.m. UTC | #5
On Thu, Mar 10, 2022 at 05:05:35PM +0100, Tobias Waldekranz wrote:
> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
> > On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
> >> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
> >> >> +		switch (state->state) {
> >> >> +		case BR_STATE_DISABLED:
> >> >> +		case BR_STATE_BLOCKING:
> >> >> +		case BR_STATE_LISTENING:
> >> >> +			/* Ideally we would only fast age entries
> >> >> +			 * belonging to VLANs controlled by this
> >> >> +			 * MST.
> >> >> +			 */
> >> >> +			dsa_port_fast_age(dp);
> >> >
> >> > Does mv88e6xxx support this? If it does, you might just as well
> >> > introduce another variant of ds->ops->port_fast_age() for an msti.
> >> 
> >> You can limit ATU operations to a particular FID. So the way I see it we
> >> could either have:
> >> 
> >> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
> >> 
> >> + Maybe more generic. You could imagine there being a way to trigger
> >>   this operation from userspace for example.
> >> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
> >>   order to be able to do the fan-out in dsa_port_set_mst_state.
> >> 
> >> or:
> >> 
> >> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
> >> 
> >> + Let's the mapping be an internal affair in the driver.
> >> - Perhaps, less generically useful.
> >> 
> >> Which one do you prefer? Or is there a hidden third option? :)
> >
> > Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
> > keeping VLAN to MSTI associations in the DSA layer. Only if we could
> > retrieve this mapping from the bridge layer - maybe with something
> > analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
> > passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
> > and zeroes.
> 
> That can easily be done. Given that, should we go for port_vlan_fast_age
> instead? port_msti_fast_age feels like an awkward interface, since I
> don't think there is any hardware out there that can actually perform
> that operation without internally fanning it out over all affected VIDs
> (or FIDs in the case of mv88e6xxx).

Yup, yup. My previous email was all over the place with regard to the
available options, because I wrote it in multiple phases so it wasn't
chronologically ordered top-to-bottom. But port_vlan_fast_age() makes
the most sense if you can implement br_mst_get_info(). Same goes for
dsa_port_notify_bridge_fdb_flush().

> > The reason why I asked for this is because I'm not sure of the
> > implications of flushing the entire FDB of the port for a single MSTP
> > state change. It would trigger temporary useless flooding in other MSTIs
> > at the very least. There isn't any backwards compatibility concern to
> > speak of, so we can at least try from the beginning to limit the
> > flushing to the required VLANs.
> 
> Aside from the performance implications of flows being temporarily
> flooded I don't think there are any.
> 
> I suppose if you've disabled flooding of unknown unicast on that port,
> you would loose the flow until you see some return traffic (or when one
> side gives up and ARPs). While somewhat esoteric, it would be nice to
> handle this case if the hardware supports it.

If by "handle this case" you mean "flush only the affected VLANs", then
yes, I fully agree.

> > What I didn't think about, and will be a problem, is
> > dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
> > The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
> > add a "vid" argument to it, and let drivers call it. Thoughts?
> 
> To me, this seems to be another argument in favor of
> port_vlan_fast_age. That way you would know the VIDs being flushed at
> the DSA layer, and driver writers needn't concern themselves with having
> to remember to generate the proper notifications back to the bridge.

See above.

> > Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
> > isn't a real problem, I suppose we could keep the "port_fast_age" method.
> 
> What about falling back to it if the driver doesn't support per-VLAN
> flushing? Flushing all entries will work in most cases, at the cost of
> some temporary flooding. Seems more useful than refusing the offload
> completely.

So here's what I don't understand. Do you expect a driver other than
mv88e6xxx to do something remotely reasonable under a bridge with MSTP
enabled? The idea being to handle gracefully the case where a port is
BLOCKING in an MSTI but FORWARDING in another. Because if not, let's
just outright not offload that kind of bridge, and only concern
ourselves with what MST-capable drivers can do.
I'm shadowing you with a prototype (and untested so far) MSTP
implementation for the ocelot/felix drivers, and those switches can
flush the MAC table per VLAN too. So I don't see an immediate need to
have a fallback implementation if you'll also provide it for mv88e6xxx.
Let's treat that only if the need arises.

> >> > And since it is new code, you could require that drivers _do_ support
> >> > configuring learning before they could support MSTP. After all, we don't
> >> > want to keep legacy mechanisms in place forever.
> >> 
> >> By "configuring learning", do you mean this new fast-age-per-vid/msti,
> >> or being able to enable/disable learning per port? If it's the latter,
> >> I'm not sure I understand how those two are related.
> >
> > The code from dsa_port_set_state() which you've copied:
> >
> > 	if (!dsa_port_can_configure_learning(dp) ||
> > 	    (do_fast_age && dp->learning)) {
> >
> > has this explanation:
> >
> > 1. DSA keeps standalone ports in the FORWARDING state.
> > 2. DSA also disables address learning on standalone ports, where this is
> >    possible (dsa_port_can_configure_learning(dp) == true).
> > 3. When a port joins a bridge, it leaves its FORWARDING state from
> >    standalone mode and inherits the bridge port's BLOCKING state
> > 4. dsa_port_set_state() treats a port transition from FORWARDING to
> >    BLOCKING as a transition requiring an FDB flush
> > 5. due to (2), the FDB flush at stage (4) is in fact not needed, because
> >    the FDB of that port should already be empty. Flushing the FDB may be
> >    a costly operation for some drivers, so it is avoided if possible.
> >
> > So this is why the "dsa_port_can_configure_learning()" check is there -
> > for compatibility with drivers that can't configure learning => they
> > keep learning enabled also in standalone mode => they need an FDB flush
> > when a standalone port joins a bridge.
> >
> > What I'm saying is: for drivers that offload MSTP, let's force them to
> > get the basics right first (have configurable learning), rather than go
> > forward forever with a backwards compatibility mode.
> 
> Makes sense, I'll just move it up to the initial capability check.
Tobias Waldekranz March 10, 2022, 4:20 p.m. UTC | #6
On Thu, Mar 10, 2022 at 17:05, Tobias Waldekranz <tobias@waldekranz.com> wrote:
> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
>> On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
>>> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
>>> >> +		switch (state->state) {
>>> >> +		case BR_STATE_DISABLED:
>>> >> +		case BR_STATE_BLOCKING:
>>> >> +		case BR_STATE_LISTENING:
>>> >> +			/* Ideally we would only fast age entries
>>> >> +			 * belonging to VLANs controlled by this
>>> >> +			 * MST.
>>> >> +			 */
>>> >> +			dsa_port_fast_age(dp);
>>> >
>>> > Does mv88e6xxx support this? If it does, you might just as well
>>> > introduce another variant of ds->ops->port_fast_age() for an msti.
>>> 
>>> You can limit ATU operations to a particular FID. So the way I see it we
>>> could either have:
>>> 
>>> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
>>> 
>>> + Maybe more generic. You could imagine there being a way to trigger
>>>   this operation from userspace for example.
>>> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
>>>   order to be able to do the fan-out in dsa_port_set_mst_state.
>>> 
>>> or:
>>> 
>>> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
>>> 
>>> + Let's the mapping be an internal affair in the driver.
>>> - Perhaps, less generically useful.
>>> 
>>> Which one do you prefer? Or is there a hidden third option? :)
>>
>> Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
>> keeping VLAN to MSTI associations in the DSA layer. Only if we could
>> retrieve this mapping from the bridge layer - maybe with something
>> analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
>> passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
>> and zeroes.
>
> That can easily be done. Given that, should we go for port_vlan_fast_age
> instead? port_msti_fast_age feels like an awkward interface, since I
> don't think there is any hardware out there that can actually perform
> that operation without internally fanning it out over all affected VIDs
> (or FIDs in the case of mv88e6xxx).
>
>> The reason why I asked for this is because I'm not sure of the
>> implications of flushing the entire FDB of the port for a single MSTP
>> state change. It would trigger temporary useless flooding in other MSTIs
>> at the very least. There isn't any backwards compatibility concern to
>> speak of, so we can at least try from the beginning to limit the
>> flushing to the required VLANs.
>
> Aside from the performance implications of flows being temporarily
> flooded I don't think there are any.
>
> I suppose if you've disabled flooding of unknown unicast on that port,
> you would loose the flow until you see some return traffic (or when one
> side gives up and ARPs). While somewhat esoteric, it would be nice to
> handle this case if the hardware supports it.
>
>> What I didn't think about, and will be a problem, is
>> dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
>> The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
>> add a "vid" argument to it, and let drivers call it. Thoughts?
>
> To me, this seems to be another argument in favor of
> port_vlan_fast_age. That way you would know the VIDs being flushed at
> the DSA layer, and driver writers needn't concern themselves with having
> to remember to generate the proper notifications back to the bridge.
>
>> Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
>> isn't a real problem, I suppose we could keep the "port_fast_age" method.
>
> What about falling back to it if the driver doesn't support per-VLAN
> flushing? Flushing all entries will work in most cases, at the cost of
> some temporary flooding. Seems more useful than refusing the offload
> completely.

Actually now that I think about it, maybe it is more reasonable to risk
having stale entries in the VLANs where the topology changed, rather
than nuking flows in unrelated VLANs.

>>> > And since it is new code, you could require that drivers _do_ support
>>> > configuring learning before they could support MSTP. After all, we don't
>>> > want to keep legacy mechanisms in place forever.
>>> 
>>> By "configuring learning", do you mean this new fast-age-per-vid/msti,
>>> or being able to enable/disable learning per port? If it's the latter,
>>> I'm not sure I understand how those two are related.
>>
>> The code from dsa_port_set_state() which you've copied:
>>
>> 	if (!dsa_port_can_configure_learning(dp) ||
>> 	    (do_fast_age && dp->learning)) {
>>
>> has this explanation:
>>
>> 1. DSA keeps standalone ports in the FORWARDING state.
>> 2. DSA also disables address learning on standalone ports, where this is
>>    possible (dsa_port_can_configure_learning(dp) == true).
>> 3. When a port joins a bridge, it leaves its FORWARDING state from
>>    standalone mode and inherits the bridge port's BLOCKING state
>> 4. dsa_port_set_state() treats a port transition from FORWARDING to
>>    BLOCKING as a transition requiring an FDB flush
>> 5. due to (2), the FDB flush at stage (4) is in fact not needed, because
>>    the FDB of that port should already be empty. Flushing the FDB may be
>>    a costly operation for some drivers, so it is avoided if possible.
>>
>> So this is why the "dsa_port_can_configure_learning()" check is there -
>> for compatibility with drivers that can't configure learning => they
>> keep learning enabled also in standalone mode => they need an FDB flush
>> when a standalone port joins a bridge.
>>
>> What I'm saying is: for drivers that offload MSTP, let's force them to
>> get the basics right first (have configurable learning), rather than go
>> forward forever with a backwards compatibility mode.
>
> Makes sense, I'll just move it up to the initial capability check.
Tobias Waldekranz March 10, 2022, 10:46 p.m. UTC | #7
On Thu, Mar 10, 2022 at 18:18, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Thu, Mar 10, 2022 at 05:05:35PM +0100, Tobias Waldekranz wrote:
>> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
>> > On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
>> >> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
>> >> >> +		switch (state->state) {
>> >> >> +		case BR_STATE_DISABLED:
>> >> >> +		case BR_STATE_BLOCKING:
>> >> >> +		case BR_STATE_LISTENING:
>> >> >> +			/* Ideally we would only fast age entries
>> >> >> +			 * belonging to VLANs controlled by this
>> >> >> +			 * MST.
>> >> >> +			 */
>> >> >> +			dsa_port_fast_age(dp);
>> >> >
>> >> > Does mv88e6xxx support this? If it does, you might just as well
>> >> > introduce another variant of ds->ops->port_fast_age() for an msti.
>> >> 
>> >> You can limit ATU operations to a particular FID. So the way I see it we
>> >> could either have:
>> >> 
>> >> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
>> >> 
>> >> + Maybe more generic. You could imagine there being a way to trigger
>> >>   this operation from userspace for example.
>> >> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
>> >>   order to be able to do the fan-out in dsa_port_set_mst_state.
>> >> 
>> >> or:
>> >> 
>> >> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
>> >> 
>> >> + Let's the mapping be an internal affair in the driver.
>> >> - Perhaps, less generically useful.
>> >> 
>> >> Which one do you prefer? Or is there a hidden third option? :)
>> >
>> > Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
>> > keeping VLAN to MSTI associations in the DSA layer. Only if we could
>> > retrieve this mapping from the bridge layer - maybe with something
>> > analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
>> > passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
>> > and zeroes.
>> 
>> That can easily be done. Given that, should we go for port_vlan_fast_age
>> instead? port_msti_fast_age feels like an awkward interface, since I
>> don't think there is any hardware out there that can actually perform
>> that operation without internally fanning it out over all affected VIDs
>> (or FIDs in the case of mv88e6xxx).
>
> Yup, yup. My previous email was all over the place with regard to the
> available options, because I wrote it in multiple phases so it wasn't
> chronologically ordered top-to-bottom. But port_vlan_fast_age() makes
> the most sense if you can implement br_mst_get_info(). Same goes for
> dsa_port_notify_bridge_fdb_flush().
>
>> > The reason why I asked for this is because I'm not sure of the
>> > implications of flushing the entire FDB of the port for a single MSTP
>> > state change. It would trigger temporary useless flooding in other MSTIs
>> > at the very least. There isn't any backwards compatibility concern to
>> > speak of, so we can at least try from the beginning to limit the
>> > flushing to the required VLANs.
>> 
>> Aside from the performance implications of flows being temporarily
>> flooded I don't think there are any.
>> 
>> I suppose if you've disabled flooding of unknown unicast on that port,
>> you would loose the flow until you see some return traffic (or when one
>> side gives up and ARPs). While somewhat esoteric, it would be nice to
>> handle this case if the hardware supports it.
>
> If by "handle this case" you mean "flush only the affected VLANs", then
> yes, I fully agree.
>
>> > What I didn't think about, and will be a problem, is
>> > dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
>> > The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
>> > add a "vid" argument to it, and let drivers call it. Thoughts?
>> 
>> To me, this seems to be another argument in favor of
>> port_vlan_fast_age. That way you would know the VIDs being flushed at
>> the DSA layer, and driver writers needn't concern themselves with having
>> to remember to generate the proper notifications back to the bridge.
>
> See above.
>
>> > Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
>> > isn't a real problem, I suppose we could keep the "port_fast_age" method.
>> 
>> What about falling back to it if the driver doesn't support per-VLAN
>> flushing? Flushing all entries will work in most cases, at the cost of
>> some temporary flooding. Seems more useful than refusing the offload
>> completely.
>
> So here's what I don't understand. Do you expect a driver other than
> mv88e6xxx to do something remotely reasonable under a bridge with MSTP
> enabled? The idea being to handle gracefully the case where a port is
> BLOCKING in an MSTI but FORWARDING in another. Because if not, let's
> just outright not offload that kind of bridge, and only concern
> ourselves with what MST-capable drivers can do.

I think you're right. I was trying to make it easier for other driver
writers, but it will just be more confusing and error prone.

Alright, so v3 will have something like this:

bool dsa_port_can_offload_mst(struct dsa_port *dp)
{
	return ds->ops->vlan_msti_set &&
		ds->ops->port_mst_state_set &&
		ds->ops->port_vlan_fast_age &&
		dsa_port_can_configure_learning(dp);
}

If this returns false, we have two options:

1. Return -EOPNOTSUPP, which the bridge will be unable to discriminate
   from a non-switchdev port saying "I have no idea what you're talking
   about". I.e. the bridge will happily apply the config, but the
   hardware won't match. I don't like this, but it lines up with most
   other stuff.

2. Return a hard error, e.g. -EINVAL/-ENOSYS. This will keep the bridge
   in sync with the hardware and also gives some feedback to the
   user. This seems like the better approach to me, but it is a new kind
   of paradigm.

What do you think?

> I'm shadowing you with a prototype (and untested so far) MSTP
> implementation for the ocelot/felix drivers, and those switches can
> flush the MAC table per VLAN too. So I don't see an immediate need to
> have a fallback implementation if you'll also provide it for mv88e6xxx.
> Let's treat that only if the need arises.

Cool. Agreed, v3 will implement .port_vlan_fast_age for mv88e6xxx.
Vladimir Oltean March 10, 2022, 11:08 p.m. UTC | #8
On Thu, Mar 10, 2022 at 11:46:45PM +0100, Tobias Waldekranz wrote:
> On Thu, Mar 10, 2022 at 18:18, Vladimir Oltean <olteanv@gmail.com> wrote:
> > On Thu, Mar 10, 2022 at 05:05:35PM +0100, Tobias Waldekranz wrote:
> >> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
> >> > On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
> >> >> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
> >> >> >> +		switch (state->state) {
> >> >> >> +		case BR_STATE_DISABLED:
> >> >> >> +		case BR_STATE_BLOCKING:
> >> >> >> +		case BR_STATE_LISTENING:
> >> >> >> +			/* Ideally we would only fast age entries
> >> >> >> +			 * belonging to VLANs controlled by this
> >> >> >> +			 * MST.
> >> >> >> +			 */
> >> >> >> +			dsa_port_fast_age(dp);
> >> >> >
> >> >> > Does mv88e6xxx support this? If it does, you might just as well
> >> >> > introduce another variant of ds->ops->port_fast_age() for an msti.
> >> >> 
> >> >> You can limit ATU operations to a particular FID. So the way I see it we
> >> >> could either have:
> >> >> 
> >> >> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
> >> >> 
> >> >> + Maybe more generic. You could imagine there being a way to trigger
> >> >>   this operation from userspace for example.
> >> >> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
> >> >>   order to be able to do the fan-out in dsa_port_set_mst_state.
> >> >> 
> >> >> or:
> >> >> 
> >> >> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
> >> >> 
> >> >> + Let's the mapping be an internal affair in the driver.
> >> >> - Perhaps, less generically useful.
> >> >> 
> >> >> Which one do you prefer? Or is there a hidden third option? :)
> >> >
> >> > Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
> >> > keeping VLAN to MSTI associations in the DSA layer. Only if we could
> >> > retrieve this mapping from the bridge layer - maybe with something
> >> > analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
> >> > passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
> >> > and zeroes.
> >> 
> >> That can easily be done. Given that, should we go for port_vlan_fast_age
> >> instead? port_msti_fast_age feels like an awkward interface, since I
> >> don't think there is any hardware out there that can actually perform
> >> that operation without internally fanning it out over all affected VIDs
> >> (or FIDs in the case of mv88e6xxx).
> >
> > Yup, yup. My previous email was all over the place with regard to the
> > available options, because I wrote it in multiple phases so it wasn't
> > chronologically ordered top-to-bottom. But port_vlan_fast_age() makes
> > the most sense if you can implement br_mst_get_info(). Same goes for
> > dsa_port_notify_bridge_fdb_flush().
> >
> >> > The reason why I asked for this is because I'm not sure of the
> >> > implications of flushing the entire FDB of the port for a single MSTP
> >> > state change. It would trigger temporary useless flooding in other MSTIs
> >> > at the very least. There isn't any backwards compatibility concern to
> >> > speak of, so we can at least try from the beginning to limit the
> >> > flushing to the required VLANs.
> >> 
> >> Aside from the performance implications of flows being temporarily
> >> flooded I don't think there are any.
> >> 
> >> I suppose if you've disabled flooding of unknown unicast on that port,
> >> you would loose the flow until you see some return traffic (or when one
> >> side gives up and ARPs). While somewhat esoteric, it would be nice to
> >> handle this case if the hardware supports it.
> >
> > If by "handle this case" you mean "flush only the affected VLANs", then
> > yes, I fully agree.
> >
> >> > What I didn't think about, and will be a problem, is
> >> > dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
> >> > The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
> >> > add a "vid" argument to it, and let drivers call it. Thoughts?
> >> 
> >> To me, this seems to be another argument in favor of
> >> port_vlan_fast_age. That way you would know the VIDs being flushed at
> >> the DSA layer, and driver writers needn't concern themselves with having
> >> to remember to generate the proper notifications back to the bridge.
> >
> > See above.
> >
> >> > Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
> >> > isn't a real problem, I suppose we could keep the "port_fast_age" method.
> >> 
> >> What about falling back to it if the driver doesn't support per-VLAN
> >> flushing? Flushing all entries will work in most cases, at the cost of
> >> some temporary flooding. Seems more useful than refusing the offload
> >> completely.
> >
> > So here's what I don't understand. Do you expect a driver other than
> > mv88e6xxx to do something remotely reasonable under a bridge with MSTP
> > enabled? The idea being to handle gracefully the case where a port is
> > BLOCKING in an MSTI but FORWARDING in another. Because if not, let's
> > just outright not offload that kind of bridge, and only concern
> > ourselves with what MST-capable drivers can do.
> 
> I think you're right. I was trying to make it easier for other driver
> writers, but it will just be more confusing and error prone.
> 
> Alright, so v3 will have something like this:
> 
> bool dsa_port_can_offload_mst(struct dsa_port *dp)
> {
> 	return ds->ops->vlan_msti_set &&
> 		ds->ops->port_mst_state_set &&
> 		ds->ops->port_vlan_fast_age &&
> 		dsa_port_can_configure_learning(dp);
> }
> 
> If this returns false, we have two options:
> 
> 1. Return -EOPNOTSUPP, which the bridge will be unable to discriminate
>    from a non-switchdev port saying "I have no idea what you're talking
>    about". I.e. the bridge will happily apply the config, but the
>    hardware won't match. I don't like this, but it lines up with most
>    other stuff.
> 
> 2. Return a hard error, e.g. -EINVAL/-ENOSYS. This will keep the bridge
>    in sync with the hardware and also gives some feedback to the
>    user. This seems like the better approach to me, but it is a new kind
>    of paradigm.
> 
> What do you think?

Wait, what? It matters a lot where you place the call to
dsa_port_can_offload_mst(), too. You don't have to propagate a hard
error code, either, at least if you make dsa_port_bridge_join() return
-EOPNOTSUPP prior to calling switchdev_bridge_port_offload(), no?
DSA transforms this error code into 0, and dsa_port_offloads_bridge*()
starts returning false, which makes us ignore all MSTP related switchdev
notifiers.
The important part will be to make sure that MSTP is enabled for this
bridge from the get-go (that being the only case in which we can offload
an MSTP aware bridge), and refusing to offload dynamic changes to its
MSTP state. I didn't re-check now, but I think I remember there being
limitations even in the software bridge related to dynamic MSTP mode
changes anyway - there had to not be any port VLANs, which IIUC means
that you actually need to _delete_ the port PVIDs which are automatically
created before you could change the MSTP mode.

This is the model, what's wrong with it? I said "don't offload the
bridge", not "don't offload specific MSTP operations".

> > I'm shadowing you with a prototype (and untested so far) MSTP
> > implementation for the ocelot/felix drivers, and those switches can
> > flush the MAC table per VLAN too. So I don't see an immediate need to
> > have a fallback implementation if you'll also provide it for mv88e6xxx.
> > Let's treat that only if the need arises.
> 
> Cool. Agreed, v3 will implement .port_vlan_fast_age for mv88e6xxx.
Tobias Waldekranz March 10, 2022, 11:59 p.m. UTC | #9
On Fri, Mar 11, 2022 at 01:08, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Thu, Mar 10, 2022 at 11:46:45PM +0100, Tobias Waldekranz wrote:
>> On Thu, Mar 10, 2022 at 18:18, Vladimir Oltean <olteanv@gmail.com> wrote:
>> > On Thu, Mar 10, 2022 at 05:05:35PM +0100, Tobias Waldekranz wrote:
>> >> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
>> >> > On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
>> >> >> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
>> >> >> >> +		switch (state->state) {
>> >> >> >> +		case BR_STATE_DISABLED:
>> >> >> >> +		case BR_STATE_BLOCKING:
>> >> >> >> +		case BR_STATE_LISTENING:
>> >> >> >> +			/* Ideally we would only fast age entries
>> >> >> >> +			 * belonging to VLANs controlled by this
>> >> >> >> +			 * MST.
>> >> >> >> +			 */
>> >> >> >> +			dsa_port_fast_age(dp);
>> >> >> >
>> >> >> > Does mv88e6xxx support this? If it does, you might just as well
>> >> >> > introduce another variant of ds->ops->port_fast_age() for an msti.
>> >> >> 
>> >> >> You can limit ATU operations to a particular FID. So the way I see it we
>> >> >> could either have:
>> >> >> 
>> >> >> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
>> >> >> 
>> >> >> + Maybe more generic. You could imagine there being a way to trigger
>> >> >>   this operation from userspace for example.
>> >> >> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
>> >> >>   order to be able to do the fan-out in dsa_port_set_mst_state.
>> >> >> 
>> >> >> or:
>> >> >> 
>> >> >> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
>> >> >> 
>> >> >> + Let's the mapping be an internal affair in the driver.
>> >> >> - Perhaps, less generically useful.
>> >> >> 
>> >> >> Which one do you prefer? Or is there a hidden third option? :)
>> >> >
>> >> > Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
>> >> > keeping VLAN to MSTI associations in the DSA layer. Only if we could
>> >> > retrieve this mapping from the bridge layer - maybe with something
>> >> > analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
>> >> > passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
>> >> > and zeroes.
>> >> 
>> >> That can easily be done. Given that, should we go for port_vlan_fast_age
>> >> instead? port_msti_fast_age feels like an awkward interface, since I
>> >> don't think there is any hardware out there that can actually perform
>> >> that operation without internally fanning it out over all affected VIDs
>> >> (or FIDs in the case of mv88e6xxx).
>> >
>> > Yup, yup. My previous email was all over the place with regard to the
>> > available options, because I wrote it in multiple phases so it wasn't
>> > chronologically ordered top-to-bottom. But port_vlan_fast_age() makes
>> > the most sense if you can implement br_mst_get_info(). Same goes for
>> > dsa_port_notify_bridge_fdb_flush().
>> >
>> >> > The reason why I asked for this is because I'm not sure of the
>> >> > implications of flushing the entire FDB of the port for a single MSTP
>> >> > state change. It would trigger temporary useless flooding in other MSTIs
>> >> > at the very least. There isn't any backwards compatibility concern to
>> >> > speak of, so we can at least try from the beginning to limit the
>> >> > flushing to the required VLANs.
>> >> 
>> >> Aside from the performance implications of flows being temporarily
>> >> flooded I don't think there are any.
>> >> 
>> >> I suppose if you've disabled flooding of unknown unicast on that port,
>> >> you would loose the flow until you see some return traffic (or when one
>> >> side gives up and ARPs). While somewhat esoteric, it would be nice to
>> >> handle this case if the hardware supports it.
>> >
>> > If by "handle this case" you mean "flush only the affected VLANs", then
>> > yes, I fully agree.
>> >
>> >> > What I didn't think about, and will be a problem, is
>> >> > dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
>> >> > The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
>> >> > add a "vid" argument to it, and let drivers call it. Thoughts?
>> >> 
>> >> To me, this seems to be another argument in favor of
>> >> port_vlan_fast_age. That way you would know the VIDs being flushed at
>> >> the DSA layer, and driver writers needn't concern themselves with having
>> >> to remember to generate the proper notifications back to the bridge.
>> >
>> > See above.
>> >
>> >> > Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
>> >> > isn't a real problem, I suppose we could keep the "port_fast_age" method.
>> >> 
>> >> What about falling back to it if the driver doesn't support per-VLAN
>> >> flushing? Flushing all entries will work in most cases, at the cost of
>> >> some temporary flooding. Seems more useful than refusing the offload
>> >> completely.
>> >
>> > So here's what I don't understand. Do you expect a driver other than
>> > mv88e6xxx to do something remotely reasonable under a bridge with MSTP
>> > enabled? The idea being to handle gracefully the case where a port is
>> > BLOCKING in an MSTI but FORWARDING in another. Because if not, let's
>> > just outright not offload that kind of bridge, and only concern
>> > ourselves with what MST-capable drivers can do.
>> 
>> I think you're right. I was trying to make it easier for other driver
>> writers, but it will just be more confusing and error prone.
>> 
>> Alright, so v3 will have something like this:
>> 
>> bool dsa_port_can_offload_mst(struct dsa_port *dp)
>> {
>> 	return ds->ops->vlan_msti_set &&
>> 		ds->ops->port_mst_state_set &&
>> 		ds->ops->port_vlan_fast_age &&
>> 		dsa_port_can_configure_learning(dp);
>> }
>> 
>> If this returns false, we have two options:
>> 
>> 1. Return -EOPNOTSUPP, which the bridge will be unable to discriminate
>>    from a non-switchdev port saying "I have no idea what you're talking
>>    about". I.e. the bridge will happily apply the config, but the
>>    hardware won't match. I don't like this, but it lines up with most
>>    other stuff.
>> 
>> 2. Return a hard error, e.g. -EINVAL/-ENOSYS. This will keep the bridge
>>    in sync with the hardware and also gives some feedback to the
>>    user. This seems like the better approach to me, but it is a new kind
>>    of paradigm.
>> 
>> What do you think?
>
> Wait, what? It matters a lot where you place the call to
> dsa_port_can_offload_mst(), too. You don't have to propagate a hard
> error code, either, at least if you make dsa_port_bridge_join() return
> -EOPNOTSUPP prior to calling switchdev_bridge_port_offload(), no?
> DSA transforms this error code into 0, and dsa_port_offloads_bridge*()
> starts returning false, which makes us ignore all MSTP related switchdev
> notifiers.

Right. So we also need:

1. A br_mst_enabled() that we can call from dsa_port_bridge_join to
   validate the initial state.

2. A switchdev attr event sent out when enabling/disabling MST on the
   bridge, so that we can NAK the change.

> The important part will be to make sure that MSTP is enabled for this
> bridge from the get-go (that being the only case in which we can offload
> an MSTP aware bridge), and refusing to offload dynamic changes to its
> MSTP state. I didn't re-check now, but I think I remember there being

Hang on though. Won't that mean that this sequence...

ip link add dev br0 type bridge \
    vlan_filtering 1 vlan_default_pvid 0 mst_enable 1
ip link set dev swp1 master br0

...will work, but offloading will be disabled on swp0; whereas this
sequence...

ip link add dev br0 type bridge \
    vlan_filtering 1 vlan_default_pvid 0
ip link set dev swp1 master br0
ip link set dev br0 type bridge mst_enable 1

...will fail on the final command? Even though they are logically
equivalent? But maybe that's just the way the cookie crumbles.

> limitations even in the software bridge related to dynamic MSTP mode
> changes anyway - there had to not be any port VLANs, which IIUC means
> that you actually need to _delete_ the port PVIDs which are automatically
> created before you could change the MSTP mode.

There are some ergonomic issues there, yes. I might look at it again and
see if there is some reasonable way of allowing the mode to be changed
even when VLANs are present.

> This is the model, what's wrong with it? I said "don't offload the
> bridge", not "don't offload specific MSTP operations".

Nothing is wrong, I just couldn't see the whole picture.

This is the way.
Vladimir Oltean March 11, 2022, 12:22 a.m. UTC | #10
On Fri, Mar 11, 2022 at 12:59:54AM +0100, Tobias Waldekranz wrote:
> On Fri, Mar 11, 2022 at 01:08, Vladimir Oltean <olteanv@gmail.com> wrote:
> > On Thu, Mar 10, 2022 at 11:46:45PM +0100, Tobias Waldekranz wrote:
> >> On Thu, Mar 10, 2022 at 18:18, Vladimir Oltean <olteanv@gmail.com> wrote:
> >> > On Thu, Mar 10, 2022 at 05:05:35PM +0100, Tobias Waldekranz wrote:
> >> >> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
> >> >> > On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
> >> >> >> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
> >> >> >> >> +		switch (state->state) {
> >> >> >> >> +		case BR_STATE_DISABLED:
> >> >> >> >> +		case BR_STATE_BLOCKING:
> >> >> >> >> +		case BR_STATE_LISTENING:
> >> >> >> >> +			/* Ideally we would only fast age entries
> >> >> >> >> +			 * belonging to VLANs controlled by this
> >> >> >> >> +			 * MST.
> >> >> >> >> +			 */
> >> >> >> >> +			dsa_port_fast_age(dp);
> >> >> >> >
> >> >> >> > Does mv88e6xxx support this? If it does, you might just as well
> >> >> >> > introduce another variant of ds->ops->port_fast_age() for an msti.
> >> >> >> 
> >> >> >> You can limit ATU operations to a particular FID. So the way I see it we
> >> >> >> could either have:
> >> >> >> 
> >> >> >> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
> >> >> >> 
> >> >> >> + Maybe more generic. You could imagine there being a way to trigger
> >> >> >>   this operation from userspace for example.
> >> >> >> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
> >> >> >>   order to be able to do the fan-out in dsa_port_set_mst_state.
> >> >> >> 
> >> >> >> or:
> >> >> >> 
> >> >> >> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
> >> >> >> 
> >> >> >> + Let's the mapping be an internal affair in the driver.
> >> >> >> - Perhaps, less generically useful.
> >> >> >> 
> >> >> >> Which one do you prefer? Or is there a hidden third option? :)
> >> >> >
> >> >> > Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
> >> >> > keeping VLAN to MSTI associations in the DSA layer. Only if we could
> >> >> > retrieve this mapping from the bridge layer - maybe with something
> >> >> > analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
> >> >> > passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
> >> >> > and zeroes.
> >> >> 
> >> >> That can easily be done. Given that, should we go for port_vlan_fast_age
> >> >> instead? port_msti_fast_age feels like an awkward interface, since I
> >> >> don't think there is any hardware out there that can actually perform
> >> >> that operation without internally fanning it out over all affected VIDs
> >> >> (or FIDs in the case of mv88e6xxx).
> >> >
> >> > Yup, yup. My previous email was all over the place with regard to the
> >> > available options, because I wrote it in multiple phases so it wasn't
> >> > chronologically ordered top-to-bottom. But port_vlan_fast_age() makes
> >> > the most sense if you can implement br_mst_get_info(). Same goes for
> >> > dsa_port_notify_bridge_fdb_flush().
> >> >
> >> >> > The reason why I asked for this is because I'm not sure of the
> >> >> > implications of flushing the entire FDB of the port for a single MSTP
> >> >> > state change. It would trigger temporary useless flooding in other MSTIs
> >> >> > at the very least. There isn't any backwards compatibility concern to
> >> >> > speak of, so we can at least try from the beginning to limit the
> >> >> > flushing to the required VLANs.
> >> >> 
> >> >> Aside from the performance implications of flows being temporarily
> >> >> flooded I don't think there are any.
> >> >> 
> >> >> I suppose if you've disabled flooding of unknown unicast on that port,
> >> >> you would loose the flow until you see some return traffic (or when one
> >> >> side gives up and ARPs). While somewhat esoteric, it would be nice to
> >> >> handle this case if the hardware supports it.
> >> >
> >> > If by "handle this case" you mean "flush only the affected VLANs", then
> >> > yes, I fully agree.
> >> >
> >> >> > What I didn't think about, and will be a problem, is
> >> >> > dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
> >> >> > The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
> >> >> > add a "vid" argument to it, and let drivers call it. Thoughts?
> >> >> 
> >> >> To me, this seems to be another argument in favor of
> >> >> port_vlan_fast_age. That way you would know the VIDs being flushed at
> >> >> the DSA layer, and driver writers needn't concern themselves with having
> >> >> to remember to generate the proper notifications back to the bridge.
> >> >
> >> > See above.
> >> >
> >> >> > Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
> >> >> > isn't a real problem, I suppose we could keep the "port_fast_age" method.
> >> >> 
> >> >> What about falling back to it if the driver doesn't support per-VLAN
> >> >> flushing? Flushing all entries will work in most cases, at the cost of
> >> >> some temporary flooding. Seems more useful than refusing the offload
> >> >> completely.
> >> >
> >> > So here's what I don't understand. Do you expect a driver other than
> >> > mv88e6xxx to do something remotely reasonable under a bridge with MSTP
> >> > enabled? The idea being to handle gracefully the case where a port is
> >> > BLOCKING in an MSTI but FORWARDING in another. Because if not, let's
> >> > just outright not offload that kind of bridge, and only concern
> >> > ourselves with what MST-capable drivers can do.
> >> 
> >> I think you're right. I was trying to make it easier for other driver
> >> writers, but it will just be more confusing and error prone.
> >> 
> >> Alright, so v3 will have something like this:
> >> 
> >> bool dsa_port_can_offload_mst(struct dsa_port *dp)
> >> {
> >> 	return ds->ops->vlan_msti_set &&
> >> 		ds->ops->port_mst_state_set &&
> >> 		ds->ops->port_vlan_fast_age &&
> >> 		dsa_port_can_configure_learning(dp);
> >> }
> >> 
> >> If this returns false, we have two options:
> >> 
> >> 1. Return -EOPNOTSUPP, which the bridge will be unable to discriminate
> >>    from a non-switchdev port saying "I have no idea what you're talking
> >>    about". I.e. the bridge will happily apply the config, but the
> >>    hardware won't match. I don't like this, but it lines up with most
> >>    other stuff.
> >> 
> >> 2. Return a hard error, e.g. -EINVAL/-ENOSYS. This will keep the bridge
> >>    in sync with the hardware and also gives some feedback to the
> >>    user. This seems like the better approach to me, but it is a new kind
> >>    of paradigm.
> >> 
> >> What do you think?
> >
> > Wait, what? It matters a lot where you place the call to
> > dsa_port_can_offload_mst(), too. You don't have to propagate a hard
> > error code, either, at least if you make dsa_port_bridge_join() return
> > -EOPNOTSUPP prior to calling switchdev_bridge_port_offload(), no?
> > DSA transforms this error code into 0, and dsa_port_offloads_bridge*()
> > starts returning false, which makes us ignore all MSTP related switchdev
> > notifiers.
> 
> Right. So we also need:
> 
> 1. A br_mst_enabled() that we can call from dsa_port_bridge_join to
>    validate the initial state.
> 
> 2. A switchdev attr event sent out when enabling/disabling MST on the
>    bridge, so that we can NAK the change.

So far, so good. This, to me, is analogous to the way in which a hypothetical
VLAN-unaware switchdev driver wouldn't deny VLAN additions or removals,
but it would only accept a VLAN-unaware bridge, and refuse to transition
into a VLAN-aware one. So even though we wouldn't deny the bridge from
keeping state that would have effect when VLAN awareness is on, we
would just deny the bridge from making that state active. Same with MSTP
awareness in my view - don't deny MSTI migrations, per-MSTI port state
changes etc, just the ability to turn on MSTP awareness.

In practice I have only seen things done the other way around - the
dpaa2-switch driver refuses VLAN-unaware bridges, so it doesn't need to
handle ignoring VLAN switchdev notifiers - a slightly simpler task.
Also, the concept of unoffloaded uppers seems to be pretty unique to DSA
so far, among switchdev drivers.

> > The important part will be to make sure that MSTP is enabled for this
> > bridge from the get-go (that being the only case in which we can offload
> > an MSTP aware bridge), and refusing to offload dynamic changes to its
> > MSTP state. I didn't re-check now, but I think I remember there being
> 
> Hang on though. Won't that mean that this sequence...
> 
> ip link add dev br0 type bridge \
>     vlan_filtering 1 vlan_default_pvid 0 mst_enable 1
> ip link set dev swp1 master br0
> 
> ...will work, but offloading will be disabled on swp0; whereas this
> sequence...
> 
> ip link add dev br0 type bridge \
>     vlan_filtering 1 vlan_default_pvid 0
> ip link set dev swp1 master br0
> ip link set dev br0 type bridge mst_enable 1
> 
> ...will fail on the final command? Even though they are logically
> equivalent? But maybe that's just the way the cookie crumbles.

Well, they could be made equivalent for academic purposes, if you're
prepared to dynamically unoffload a bridge port from the MST awareness
notifier, be my guest, I never tried it... I suppose we could try it, in
theory it's just a call to dsa_port_pre_bridge_leave() +
dsa_port_bridge_leave() after all. But it's effort to be spent in work
and testing, and I'm not sure whether anyone will see the benefit or use
case. During initial bridge join, at least it's an established code
path, the drivers which don't implement ds->ops->port_bridge_join() have
exercised it. Alvin Šipraga has fixed a few bugs related to rtl8365mb
and this after some recent rework, it should work just fine now.

> > limitations even in the software bridge related to dynamic MSTP mode
> > changes anyway - there had to not be any port VLANs, which IIUC means
> > that you actually need to _delete_ the port PVIDs which are automatically
> > created before you could change the MSTP mode.
> 
> There are some ergonomic issues there, yes. I might look at it again and
> see if there is some reasonable way of allowing the mode to be changed
> even when VLANs are present.
> 
> > This is the model, what's wrong with it? I said "don't offload the
> > bridge", not "don't offload specific MSTP operations".
> 
> Nothing is wrong, I just couldn't see the whole picture.
> 
> This is the way.
Tobias Waldekranz March 11, 2022, 9:01 a.m. UTC | #11
On Fri, Mar 11, 2022 at 02:22, Vladimir Oltean <olteanv@gmail.com> wrote:
> On Fri, Mar 11, 2022 at 12:59:54AM +0100, Tobias Waldekranz wrote:
>> On Fri, Mar 11, 2022 at 01:08, Vladimir Oltean <olteanv@gmail.com> wrote:
>> > On Thu, Mar 10, 2022 at 11:46:45PM +0100, Tobias Waldekranz wrote:
>> >> On Thu, Mar 10, 2022 at 18:18, Vladimir Oltean <olteanv@gmail.com> wrote:
>> >> > On Thu, Mar 10, 2022 at 05:05:35PM +0100, Tobias Waldekranz wrote:
>> >> >> On Thu, Mar 10, 2022 at 12:35, Vladimir Oltean <olteanv@gmail.com> wrote:
>> >> >> > On Thu, Mar 10, 2022 at 09:54:34AM +0100, Tobias Waldekranz wrote:
>> >> >> >> >> +	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
>> >> >> >> >> +		switch (state->state) {
>> >> >> >> >> +		case BR_STATE_DISABLED:
>> >> >> >> >> +		case BR_STATE_BLOCKING:
>> >> >> >> >> +		case BR_STATE_LISTENING:
>> >> >> >> >> +			/* Ideally we would only fast age entries
>> >> >> >> >> +			 * belonging to VLANs controlled by this
>> >> >> >> >> +			 * MST.
>> >> >> >> >> +			 */
>> >> >> >> >> +			dsa_port_fast_age(dp);
>> >> >> >> >
>> >> >> >> > Does mv88e6xxx support this? If it does, you might just as well
>> >> >> >> > introduce another variant of ds->ops->port_fast_age() for an msti.
>> >> >> >> 
>> >> >> >> You can limit ATU operations to a particular FID. So the way I see it we
>> >> >> >> could either have:
>> >> >> >> 
>> >> >> >> int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid)
>> >> >> >> 
>> >> >> >> + Maybe more generic. You could imagine there being a way to trigger
>> >> >> >>   this operation from userspace for example.
>> >> >> >> - We would have to keep the VLAN<->MSTI mapping in the DSA layer in
>> >> >> >>   order to be able to do the fan-out in dsa_port_set_mst_state.
>> >> >> >> 
>> >> >> >> or:
>> >> >> >> 
>> >> >> >> int (*port_msti_fast_age)(struct dsa_switch *ds, int port, u16 msti)
>> >> >> >> 
>> >> >> >> + Let's the mapping be an internal affair in the driver.
>> >> >> >> - Perhaps, less generically useful.
>> >> >> >> 
>> >> >> >> Which one do you prefer? Or is there a hidden third option? :)
>> >> >> >
>> >> >> > Yes, I was thinking of "port_msti_fast_age". I don't see a cheap way of
>> >> >> > keeping VLAN to MSTI associations in the DSA layer. Only if we could
>> >> >> > retrieve this mapping from the bridge layer - maybe with something
>> >> >> > analogous to br_vlan_get_info(), but br_mst_get_info(), and this gets
>> >> >> > passed a VLAN_N_VID sized bitmap, which the bridge populates with ones
>> >> >> > and zeroes.
>> >> >> 
>> >> >> That can easily be done. Given that, should we go for port_vlan_fast_age
>> >> >> instead? port_msti_fast_age feels like an awkward interface, since I
>> >> >> don't think there is any hardware out there that can actually perform
>> >> >> that operation without internally fanning it out over all affected VIDs
>> >> >> (or FIDs in the case of mv88e6xxx).
>> >> >
>> >> > Yup, yup. My previous email was all over the place with regard to the
>> >> > available options, because I wrote it in multiple phases so it wasn't
>> >> > chronologically ordered top-to-bottom. But port_vlan_fast_age() makes
>> >> > the most sense if you can implement br_mst_get_info(). Same goes for
>> >> > dsa_port_notify_bridge_fdb_flush().
>> >> >
>> >> >> > The reason why I asked for this is because I'm not sure of the
>> >> >> > implications of flushing the entire FDB of the port for a single MSTP
>> >> >> > state change. It would trigger temporary useless flooding in other MSTIs
>> >> >> > at the very least. There isn't any backwards compatibility concern to
>> >> >> > speak of, so we can at least try from the beginning to limit the
>> >> >> > flushing to the required VLANs.
>> >> >> 
>> >> >> Aside from the performance implications of flows being temporarily
>> >> >> flooded I don't think there are any.
>> >> >> 
>> >> >> I suppose if you've disabled flooding of unknown unicast on that port,
>> >> >> you would loose the flow until you see some return traffic (or when one
>> >> >> side gives up and ARPs). While somewhat esoteric, it would be nice to
>> >> >> handle this case if the hardware supports it.
>> >> >
>> >> > If by "handle this case" you mean "flush only the affected VLANs", then
>> >> > yes, I fully agree.
>> >> >
>> >> >> > What I didn't think about, and will be a problem, is
>> >> >> > dsa_port_notify_bridge_fdb_flush() - we don't know the vid to flush.
>> >> >> > The easy way out here would be to export dsa_port_notify_bridge_fdb_flush(),
>> >> >> > add a "vid" argument to it, and let drivers call it. Thoughts?
>> >> >> 
>> >> >> To me, this seems to be another argument in favor of
>> >> >> port_vlan_fast_age. That way you would know the VIDs being flushed at
>> >> >> the DSA layer, and driver writers needn't concern themselves with having
>> >> >> to remember to generate the proper notifications back to the bridge.
>> >> >
>> >> > See above.
>> >> >
>> >> >> > Alternatively, if you think that cross-flushing FDBs of multiple MSTIs
>> >> >> > isn't a real problem, I suppose we could keep the "port_fast_age" method.
>> >> >> 
>> >> >> What about falling back to it if the driver doesn't support per-VLAN
>> >> >> flushing? Flushing all entries will work in most cases, at the cost of
>> >> >> some temporary flooding. Seems more useful than refusing the offload
>> >> >> completely.
>> >> >
>> >> > So here's what I don't understand. Do you expect a driver other than
>> >> > mv88e6xxx to do something remotely reasonable under a bridge with MSTP
>> >> > enabled? The idea being to handle gracefully the case where a port is
>> >> > BLOCKING in an MSTI but FORWARDING in another. Because if not, let's
>> >> > just outright not offload that kind of bridge, and only concern
>> >> > ourselves with what MST-capable drivers can do.
>> >> 
>> >> I think you're right. I was trying to make it easier for other driver
>> >> writers, but it will just be more confusing and error prone.
>> >> 
>> >> Alright, so v3 will have something like this:
>> >> 
>> >> bool dsa_port_can_offload_mst(struct dsa_port *dp)
>> >> {
>> >> 	return ds->ops->vlan_msti_set &&
>> >> 		ds->ops->port_mst_state_set &&
>> >> 		ds->ops->port_vlan_fast_age &&
>> >> 		dsa_port_can_configure_learning(dp);
>> >> }
>> >> 
>> >> If this returns false, we have two options:
>> >> 
>> >> 1. Return -EOPNOTSUPP, which the bridge will be unable to discriminate
>> >>    from a non-switchdev port saying "I have no idea what you're talking
>> >>    about". I.e. the bridge will happily apply the config, but the
>> >>    hardware won't match. I don't like this, but it lines up with most
>> >>    other stuff.
>> >> 
>> >> 2. Return a hard error, e.g. -EINVAL/-ENOSYS. This will keep the bridge
>> >>    in sync with the hardware and also gives some feedback to the
>> >>    user. This seems like the better approach to me, but it is a new kind
>> >>    of paradigm.
>> >> 
>> >> What do you think?
>> >
>> > Wait, what? It matters a lot where you place the call to
>> > dsa_port_can_offload_mst(), too. You don't have to propagate a hard
>> > error code, either, at least if you make dsa_port_bridge_join() return
>> > -EOPNOTSUPP prior to calling switchdev_bridge_port_offload(), no?
>> > DSA transforms this error code into 0, and dsa_port_offloads_bridge*()
>> > starts returning false, which makes us ignore all MSTP related switchdev
>> > notifiers.
>> 
>> Right. So we also need:
>> 
>> 1. A br_mst_enabled() that we can call from dsa_port_bridge_join to
>>    validate the initial state.
>> 
>> 2. A switchdev attr event sent out when enabling/disabling MST on the
>>    bridge, so that we can NAK the change.
>
> So far, so good. This, to me, is analogous to the way in which a hypothetical
> VLAN-unaware switchdev driver wouldn't deny VLAN additions or removals,
> but it would only accept a VLAN-unaware bridge, and refuse to transition
> into a VLAN-aware one. So even though we wouldn't deny the bridge from
> keeping state that would have effect when VLAN awareness is on, we
> would just deny the bridge from making that state active. Same with MSTP
> awareness in my view - don't deny MSTI migrations, per-MSTI port state
> changes etc, just the ability to turn on MSTP awareness.
>
> In practice I have only seen things done the other way around - the
> dpaa2-switch driver refuses VLAN-unaware bridges, so it doesn't need to
> handle ignoring VLAN switchdev notifiers - a slightly simpler task.
> Also, the concept of unoffloaded uppers seems to be pretty unique to DSA
> so far, among switchdev drivers.
>
>> > The important part will be to make sure that MSTP is enabled for this
>> > bridge from the get-go (that being the only case in which we can offload
>> > an MSTP aware bridge), and refusing to offload dynamic changes to its
>> > MSTP state. I didn't re-check now, but I think I remember there being
>> 
>> Hang on though. Won't that mean that this sequence...
>> 
>> ip link add dev br0 type bridge \
>>     vlan_filtering 1 vlan_default_pvid 0 mst_enable 1
>> ip link set dev swp1 master br0
>> 
>> ...will work, but offloading will be disabled on swp0; whereas this
>> sequence...
>> 
>> ip link add dev br0 type bridge \
>>     vlan_filtering 1 vlan_default_pvid 0
>> ip link set dev swp1 master br0
>> ip link set dev br0 type bridge mst_enable 1
>> 
>> ...will fail on the final command? Even though they are logically
>> equivalent? But maybe that's just the way the cookie crumbles.
>
> Well, they could be made equivalent for academic purposes, if you're
> prepared to dynamically unoffload a bridge port from the MST awareness
> notifier, be my guest, I never tried it... I suppose we could try it, in
> theory it's just a call to dsa_port_pre_bridge_leave() +
> dsa_port_bridge_leave() after all. But it's effort to be spent in work
> and testing, and I'm not sure whether anyone will see the benefit or use
> case. During initial bridge join, at least it's an established code
> path, the drivers which don't implement ds->ops->port_bridge_join() have
> exercised it. Alvin Šipraga has fixed a few bugs related to rtl8365mb
> and this after some recent rework, it should work just fine now.

I completely agree. Just wanted to make sure that I had understood it
correctly. Thanks.

>> > limitations even in the software bridge related to dynamic MSTP mode
>> > changes anyway - there had to not be any port VLANs, which IIUC means
>> > that you actually need to _delete_ the port PVIDs which are automatically
>> > created before you could change the MSTP mode.
>> 
>> There are some ergonomic issues there, yes. I might look at it again and
>> see if there is some reasonable way of allowing the mode to be changed
>> even when VLANs are present.
>> 
>> > This is the model, what's wrong with it? I said "don't offload the
>> > bridge", not "don't offload specific MSTP operations".
>> 
>> Nothing is wrong, I just couldn't see the whole picture.
>> 
>> This is the way.
diff mbox series

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index cc8acb01bd9b..096e6e3a8e1e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -943,6 +943,8 @@  struct dsa_switch_ops {
 				     struct dsa_bridge bridge);
 	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
 				      u8 state);
+	int	(*port_mst_state_set)(struct dsa_switch *ds, int port,
+				      const struct switchdev_mst_state *state);
 	void	(*port_fast_age)(struct dsa_switch *ds, int port);
 	int	(*port_pre_bridge_flags)(struct dsa_switch *ds, int port,
 					 struct switchdev_brport_flags flags,
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 87ec0697e92e..a620e079ebc5 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -198,6 +198,8 @@  static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
 			       const struct dsa_device_ops *tag_ops);
 int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
+int dsa_port_set_mst_state(struct dsa_port *dp,
+			   const struct switchdev_mst_state *state);
 int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
 void dsa_port_disable_rt(struct dsa_port *dp);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 5f45cb7d70ba..26cfbc8ab499 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -108,6 +108,36 @@  int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
 	return 0;
 }
 
+int dsa_port_set_mst_state(struct dsa_port *dp,
+			   const struct switchdev_mst_state *state)
+{
+	struct dsa_switch *ds = dp->ds;
+	int err, port = dp->index;
+
+	if (!ds->ops->port_mst_state_set)
+		return -EOPNOTSUPP;
+
+	err = ds->ops->port_mst_state_set(ds, port, state);
+	if (err)
+		return err;
+
+	if (!dsa_port_can_configure_learning(dp) || dp->learning) {
+		switch (state->state) {
+		case BR_STATE_DISABLED:
+		case BR_STATE_BLOCKING:
+		case BR_STATE_LISTENING:
+			/* Ideally we would only fast age entries
+			 * belonging to VLANs controlled by this
+			 * MST.
+			 */
+			dsa_port_fast_age(dp);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
 				   bool do_fast_age)
 {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index c6ffcd782b5a..32b006a5b778 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -288,6 +288,12 @@  static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
 
 		ret = dsa_port_set_state(dp, attr->u.stp_state, true);
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_MST_STATE:
+		if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+			return -EOPNOTSUPP;
+
+		ret = dsa_port_set_mst_state(dp, &attr->u.mst_state);
+		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
 		if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev))
 			return -EOPNOTSUPP;