diff mbox series

[V3,net-next,3/4] net: dsa: mv88e6xxx: mac-auth/MAB implementation

Message ID 20220524152144.40527-4-schultz.hans+netdev@gmail.com (mailing list archive)
State New
Headers show
Series Extend locked port feature with FDB locked flag (MAC-Auth/MAB) | expand

Commit Message

Hans S May 24, 2022, 3:21 p.m. UTC
This implementation for the Marvell mv88e6xxx chip series, is
based on handling ATU miss violations occurring when packets
ingress on a port that is locked. The mac address triggering
the ATU miss violation is communicated through switchdev to
the bridge module, which adds a fdb entry with the fdb locked
flag set. The entry is kept according to the bridges ageing
time, thus simulating a dynamic entry.

Note: The locked port must have learning enabled for the ATU
miss violation to occur.

Signed-off-by: Hans Schultz <schultz.hans+netdev@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/Makefile            |   1 +
 drivers/net/dsa/mv88e6xxx/chip.c              |  40 ++-
 drivers/net/dsa/mv88e6xxx/chip.h              |   5 +
 drivers/net/dsa/mv88e6xxx/global1.h           |   1 +
 drivers/net/dsa/mv88e6xxx/global1_atu.c       |  35 ++-
 .../net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c   | 249 ++++++++++++++++++
 .../net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h   |  40 +++
 drivers/net/dsa/mv88e6xxx/port.c              |  32 ++-
 drivers/net/dsa/mv88e6xxx/port.h              |   2 +
 9 files changed, 389 insertions(+), 16 deletions(-)
 create mode 100644 drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
 create mode 100644 drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h

Comments

kernel test robot May 24, 2022, 9:36 p.m. UTC | #1
Hi Hans,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Hans-Schultz/Extend-locked-port-feature-with-FDB-locked-flag-MAC-Auth-MAB/20220524-232455
base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 677fb7525331375ba2f90f4bc94a80b9b6e697a3
config: x86_64-randconfig-a005 (https://download.01.org/0day-ci/archive/20220525/202205250511.beuDDi9L-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 10c9ecce9f6096e18222a331c5e7d085bd813f75)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/5c2d731ec7670b3eb06906c64d66c6098c588a6a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Hans-Schultz/Extend-locked-port-feature-with-FDB-locked-flag-MAC-Auth-MAB/20220524-232455
        git checkout 5c2d731ec7670b3eb06906c64d66c6098c588a6a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/net/dsa/mv88e6xxx/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/net/dsa/mv88e6xxx/chip.c:2754:6: warning: variable 'err' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
           if (!locked_found) {
               ^~~~~~~~~~~~~
   drivers/net/dsa/mv88e6xxx/chip.c:2759:9: note: uninitialized use occurs here
           return err;
                  ^~~
   drivers/net/dsa/mv88e6xxx/chip.c:2754:2: note: remove the 'if' if its condition is always true
           if (!locked_found) {
           ^~~~~~~~~~~~~~~~~~~
   drivers/net/dsa/mv88e6xxx/chip.c:2749:9: note: initialize the variable 'err' to silence this warning
           int err;
                  ^
                   = 0
   1 warning generated.


vim +2754 drivers/net/dsa/mv88e6xxx/chip.c

  2742	
  2743	static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
  2744					  const unsigned char *addr, u16 vid,
  2745					  struct dsa_db db)
  2746	{
  2747		struct mv88e6xxx_chip *chip = ds->priv;
  2748		bool locked_found = false;
  2749		int err;
  2750	
  2751		if (mv88e6xxx_port_is_locked(chip, port, true))
  2752			locked_found = mv88e6xxx_atu_locked_entry_find_purge(ds, port, addr, vid);
  2753	
> 2754		if (!locked_found) {
  2755			mv88e6xxx_reg_lock(chip);
  2756			err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, 0);
  2757			mv88e6xxx_reg_unlock(chip);
  2758		}
  2759		return err;
  2760	}
  2761
Hans S June 27, 2022, 12:58 p.m. UTC | #2
Hi Vladimir,
maybe you have missed my upstreaming of this patch set...

According to our earlier discussions I have now implemented the
feature, so that the ATU locked entries are owned by the driver, so to
make the entries dynamic I add the entries to a list and use kernel
timers to age out the entries as they should be 'dynamic'. See
mv88e6xxx_switchdev.c.

Hans

On Tue, May 24, 2022 at 5:22 PM Hans Schultz <schultz.hans@gmail.com> wrote:
>
> This implementation for the Marvell mv88e6xxx chip series, is
> based on handling ATU miss violations occurring when packets
> ingress on a port that is locked. The mac address triggering
> the ATU miss violation is communicated through switchdev to
> the bridge module, which adds a fdb entry with the fdb locked
> flag set. The entry is kept according to the bridges ageing
> time, thus simulating a dynamic entry.
>
> Note: The locked port must have learning enabled for the ATU
> miss violation to occur.
>
> Signed-off-by: Hans Schultz <schultz.hans+netdev@gmail.com>
> ---
>  drivers/net/dsa/mv88e6xxx/Makefile            |   1 +
>  drivers/net/dsa/mv88e6xxx/chip.c              |  40 ++-
>  drivers/net/dsa/mv88e6xxx/chip.h              |   5 +
>  drivers/net/dsa/mv88e6xxx/global1.h           |   1 +
>  drivers/net/dsa/mv88e6xxx/global1_atu.c       |  35 ++-
>  .../net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c   | 249 ++++++++++++++++++
>  .../net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h   |  40 +++
>  drivers/net/dsa/mv88e6xxx/port.c              |  32 ++-
>  drivers/net/dsa/mv88e6xxx/port.h              |   2 +
>  9 files changed, 389 insertions(+), 16 deletions(-)
>  create mode 100644 drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
>  create mode 100644 drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h
>
Vladimir Oltean June 27, 2022, 6:05 p.m. UTC | #3
Hi Hans,

On Tue, May 24, 2022 at 05:21:43PM +0200, Hans Schultz wrote:
> This implementation for the Marvell mv88e6xxx chip series, is
> based on handling ATU miss violations occurring when packets
> ingress on a port that is locked. The mac address triggering
> the ATU miss violation is communicated through switchdev to
> the bridge module, which adds a fdb entry with the fdb locked
> flag set. The entry is kept according to the bridges ageing
> time, thus simulating a dynamic entry.
> 
> Note: The locked port must have learning enabled for the ATU
> miss violation to occur.
> 
> Signed-off-by: Hans Schultz <schultz.hans+netdev@gmail.com>
> ---

I'm sorry that I couldn't focus on the big picture of this patch,
but locking is an absolute disaster and I just stopped after a while,
it's really distracting :)

Would you mind addressing the feedback below first, and I'll take
another look when you send v4?

>  drivers/net/dsa/mv88e6xxx/Makefile            |   1 +
>  drivers/net/dsa/mv88e6xxx/chip.c              |  40 ++-
>  drivers/net/dsa/mv88e6xxx/chip.h              |   5 +
>  drivers/net/dsa/mv88e6xxx/global1.h           |   1 +
>  drivers/net/dsa/mv88e6xxx/global1_atu.c       |  35 ++-
>  .../net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c   | 249 ++++++++++++++++++
>  .../net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h   |  40 +++
>  drivers/net/dsa/mv88e6xxx/port.c              |  32 ++-
>  drivers/net/dsa/mv88e6xxx/port.h              |   2 +
>  9 files changed, 389 insertions(+), 16 deletions(-)
>  create mode 100644 drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
>  create mode 100644 drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
> index c8eca2b6f959..3ca57709730d 100644
> --- a/drivers/net/dsa/mv88e6xxx/Makefile
> +++ b/drivers/net/dsa/mv88e6xxx/Makefile
> @@ -15,3 +15,4 @@ mv88e6xxx-objs += port_hidden.o
>  mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
>  mv88e6xxx-objs += serdes.o
>  mv88e6xxx-objs += smi.o
> +mv88e6xxx-objs += mv88e6xxx_switchdev.o
> \ No newline at end of file
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
> index 5d2c57a7c708..f7a16886bee9 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -42,6 +42,7 @@
>  #include "ptp.h"
>  #include "serdes.h"
>  #include "smi.h"
> +#include "mv88e6xxx_switchdev.h"
>  
>  static void assert_reg_lock(struct mv88e6xxx_chip *chip)
>  {
> @@ -919,6 +920,9 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
>  	if (err)
>  		dev_err(chip->dev,
>  			"p%d: failed to force MAC link down\n", port);
> +	else
> +		if (mv88e6xxx_port_is_locked(chip, port, true))
> +			mv88e6xxx_atu_locked_entry_flush(ds, port);

This is superfluous, is it not? The bridge will transition a port whose
link goes down to BR_STATE_DISABLED, which will make dsa_port_set_state()
fast-age the dynamic FDB entries on the port, which you've already
handled below.

>  }
>  
>  static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port,
> @@ -1685,6 +1689,9 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
>  	struct mv88e6xxx_chip *chip = ds->priv;
>  	int err;
>  
> +	if (mv88e6xxx_port_is_locked(chip, port, true))
> +		mv88e6xxx_atu_locked_entry_flush(ds, port);
> +

Dumb question: if you only flush the locked entries at fast age if the
port is locked, then what happens with the existing locked entries if
the port becomes unlocked before an FDB flush takes place?
Shouldn't mv88e6xxx_port_set_lock() call mv88e6xxx_atu_locked_entry_flush()
too?

>  	mv88e6xxx_reg_lock(chip);
>  	err = mv88e6xxx_port_fast_age_fid(chip, port, 0);
>  	mv88e6xxx_reg_unlock(chip);
> @@ -1721,11 +1728,11 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
>  	return err;
>  }
>  
> -static int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
> -			      int (*cb)(struct mv88e6xxx_chip *chip,
> -					const struct mv88e6xxx_vtu_entry *entry,
> -					void *priv),
> -			      void *priv)
> +int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
> +		       int (*cb)(struct mv88e6xxx_chip *chip,
> +				 const struct mv88e6xxx_vtu_entry *entry,
> +				 void *priv),
> +		       void *priv)
>  {
>  	struct mv88e6xxx_vtu_entry entry = {
>  		.vid = mv88e6xxx_max_vid(chip),
> @@ -2722,9 +2729,12 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
>  	struct mv88e6xxx_chip *chip = ds->priv;
>  	int err;
>  
> +	if (mv88e6xxx_port_is_locked(chip, port, true))
> +		mv88e6xxx_atu_locked_entry_find_purge(ds, port, addr, vid);
> +
>  	mv88e6xxx_reg_lock(chip);
>  	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
> -					   MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
> +			MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);

Unrelated and unjustified change.

>  	mv88e6xxx_reg_unlock(chip);
>  
>  	return err;
> @@ -2735,12 +2745,17 @@ static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
>  				  struct dsa_db db)
>  {
>  	struct mv88e6xxx_chip *chip = ds->priv;
> +	bool locked_found = false;
>  	int err;
>  
> -	mv88e6xxx_reg_lock(chip);
> -	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, 0);
> -	mv88e6xxx_reg_unlock(chip);
> +	if (mv88e6xxx_port_is_locked(chip, port, true))
> +		locked_found = mv88e6xxx_atu_locked_entry_find_purge(ds, port, addr, vid);
>  
> +	if (!locked_found) {
> +		mv88e6xxx_reg_lock(chip);
> +		err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, 0);
> +		mv88e6xxx_reg_unlock(chip);
> +	}
>  	return err;
>  }
>  
> @@ -3809,11 +3824,16 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
>  
>  static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
>  {
> -	return mv88e6xxx_setup_devlink_regions_port(ds, port);
> +	int err;
> +
> +	err = mv88e6xxx_setup_devlink_regions_port(ds, port);
> +	mv88e6xxx_init_violation_handler(ds, port);

What's with this quirky placement? You need to do error checking and
call mv88e6xxx_teardown_violation_handler() if setting up the devlink
port regions fails, otherwise the port will fail to probe but no one
will quiesce its delayed ATU work.

By the way, do all mv88e6xxx switches support 802.1X and MAC Auth Bypass,
or do we need to initialize these structures depending on some capability?

> +	return err;
>  }
>  
>  static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port)
>  {
> +	mv88e6xxx_teardown_violation_handler(ds, port);
>  	mv88e6xxx_teardown_devlink_regions_port(ds, port);
>  }
>  
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
> index 5e03cfe50156..c9a8404a6293 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.h
> +++ b/drivers/net/dsa/mv88e6xxx/chip.h
> @@ -803,6 +803,11 @@ static inline void mv88e6xxx_reg_unlock(struct mv88e6xxx_chip *chip)
>  	mutex_unlock(&chip->reg_lock);
>  }
>  
> +int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
> +		       int (*cb)(struct mv88e6xxx_chip *chip,
> +				 const struct mv88e6xxx_vtu_entry *entry,
> +				 void *priv),
> +		       void *priv);
>  int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *bitmap);
>  
>  #endif /* _MV88E6XXX_CHIP_H */
> diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
> index 65958b2a0d3a..503fbf216670 100644
> --- a/drivers/net/dsa/mv88e6xxx/global1.h
> +++ b/drivers/net/dsa/mv88e6xxx/global1.h
> @@ -136,6 +136,7 @@
>  #define MV88E6XXX_G1_ATU_DATA_TRUNK				0x8000
>  #define MV88E6XXX_G1_ATU_DATA_TRUNK_ID_MASK			0x00f0
>  #define MV88E6XXX_G1_ATU_DATA_PORT_VECTOR_MASK			0x3ff0
> +#define MV88E6XXX_G1_ATU_DATA_PORT_VECTOR_NO_EGRESS		0x0000
>  #define MV88E6XXX_G1_ATU_DATA_STATE_MASK			0x000f
>  #define MV88E6XXX_G1_ATU_DATA_STATE_UC_UNUSED			0x0000
>  #define MV88E6XXX_G1_ATU_DATA_STATE_UC_AGE_1_OLDEST		0x0001
> diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
> index 40bd67a5c8e9..517376271f64 100644
> --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
> +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
> @@ -12,6 +12,8 @@
>  
>  #include "chip.h"
>  #include "global1.h"
> +#include "port.h"
> +#include "mv88e6xxx_switchdev.h"
>  
>  /* Offset 0x01: ATU FID Register */
>  
> @@ -114,6 +116,18 @@ static int mv88e6xxx_g1_atu_op_wait(struct mv88e6xxx_chip *chip)
>  	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_ATU_OP, bit, 0);
>  }
>  
> +static int mv88e6xxx_g1_read_atu_violation(struct mv88e6xxx_chip *chip)
> +{
> +	int err;
> +
> +	err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_ATU_OP,
> +				 MV88E6XXX_G1_ATU_OP_BUSY | MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION);

Split on 3 lines please.

> +	if (err)
> +		return err;
> +
> +	return mv88e6xxx_g1_atu_op_wait(chip);
> +}
> +
>  static int mv88e6xxx_g1_atu_op(struct mv88e6xxx_chip *chip, u16 fid, u16 op)
>  {
>  	u16 val;
> @@ -356,11 +370,11 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
>  	int spid;
>  	int err;
>  	u16 val;
> +	u16 fid;
>  
>  	mv88e6xxx_reg_lock(chip);
>  
> -	err = mv88e6xxx_g1_atu_op(chip, 0,
> -				  MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION);
> +	err = mv88e6xxx_g1_read_atu_violation(chip);

I cannot comment on the validity of this change: previously, we were
writing FID 0 as part of mv88e6xxx_g1_atu_op(), now we are reading back
the FID. Definitely too much going on in a single change, this needs a
separate patch with an explanation.

>  	if (err)
>  		goto out;
>  
> @@ -368,6 +382,10 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
>  	if (err)
>  		goto out;
>  
> +	err = mv88e6xxx_g1_read(chip, MV88E6352_G1_ATU_FID, &fid);
> +	if (err)
> +		goto out;

Is it ok to read the MV88E6352_G1_ATU_FID register from an IRQ handler
common for all switches, I wonder?

> +
>  	err = mv88e6xxx_g1_atu_data_read(chip, &entry);
>  	if (err)
>  		goto out;
> @@ -382,6 +400,11 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
>  		dev_err_ratelimited(chip->dev,
>  				    "ATU age out violation for %pM\n",
>  				    entry.mac);
> +		err = mv88e6xxx_handle_violation(chip,
> +						 chip->ports[spid].port,

Dumb question: isn't chip->ports[spid].port == spid?

> +						 &entry,
> +						 fid,
> +						 MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION);

This fits on 3 lines instead of 5 (and same below).

>  	}
>  
>  	if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) {
> @@ -396,6 +419,14 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
>  				    "ATU miss violation for %pM portvec %x spid %d\n",
>  				    entry.mac, entry.portvec, spid);
>  		chip->ports[spid].atu_miss_violation++;
> +		if (mv88e6xxx_port_is_locked(chip, chip->ports[spid].port, false))
> +			err = mv88e6xxx_handle_violation(chip,
> +							 chip->ports[spid].port,
> +							 &entry,
> +							 fid,
> +							 MV88E6XXX_G1_ATU_OP_MISS_VIOLATION);
> +		if (err)
> +			goto out;
>  	}
>  
>  	if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) {
> diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
> new file mode 100644
> index 000000000000..8436655ceb9a
> --- /dev/null
> +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
> @@ -0,0 +1,249 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * mv88e6xxx_switchdev.c
> + *
> + *	Authors:
> + *	Hans J. Schultz		<hans.schultz@westermo.com>
> + *
> + */
> +
> +#include <net/switchdev.h>
> +#include <linux/list.h>
> +#include "chip.h"
> +#include "global1.h"
> +#include "mv88e6xxx_switchdev.h"
> +
> +static void mv88e6xxx_atu_locked_entry_timer_work(struct atu_locked_entry *ale)

Please find a more adequate name for this function.

> +{
> +	struct switchdev_notifier_fdb_info info = {
> +		.addr = ale->mac,
> +		.vid = ale->vid,
> +		.added_by_user = false,
> +		.is_local = false,

No need to have an initializer for the false members.

> +		.offloaded = true,
> +		.locked = true,
> +	};
> +	struct mv88e6xxx_atu_entry entry;
> +	struct net_device *brport;
> +	struct dsa_port *dp;
> +
> +	entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UC_UNUSED;
> +	entry.trunk = false;
> +	memcpy(&entry.mac, &ale->mac, ETH_ALEN);

ether_addr_copy

> +
> +	mv88e6xxx_reg_lock(ale->chip);
> +	mv88e6xxx_g1_atu_loadpurge(ale->chip, ale->fid, &entry);

The portvec will be junk memory that's on stack, is that what you want?

> +	mv88e6xxx_reg_unlock(ale->chip);
> +
> +	dp = dsa_to_port(ale->chip->ds, ale->port);
> +	brport = dsa_port_to_bridge_port(dp);
> +
> +	if (brport) {
> +		if (!rtnl_is_locked()) {
> +			rtnl_lock();

No, no, no, no, no, no, no.

As I've explained already:
https://patchwork.kernel.org/project/netdevbpf/patch/20220317093902.1305816-4-schultz.hans+netdev@gmail.com/#24782974
dsa_port_to_bridge_port() needs to be called with the rtnl_mutex held.

Please take a moment to figure out which function expects which lock and
for what operation, then draw a call graph, figure out a consistent lock
hierarchy where things are always acquired in the same order, and if a
function needs a locking context but not all callers offer it, put an
ASSERT_RTNL() (for example) and transfer the locking responsibility to
the caller.

Doing this will also help you name your functions better than
"locked entry timer work" (which are called from... drum roll...
mv88e6xxx_port_fdb_del and mv88e6xxx_port_fast_age).

Which by the way, reminds me that.....
You can't take rtnl_lock() from port_fdb_add() and port_fdb_del(),
see commits d7d0d423dbaa ("net: dsa: flush switchdev workqueue when
leaving the bridge") and 0faf890fc519 ("net: dsa: drop rtnl_lock from
dsa_slave_switchdev_event_work"), as you'll deadlock with
dsa_port_pre_bridge_leave(). In fact you never could, but for a slightly
different reason.

From the discussion with Ido and Nikolay I get the impression that
you're not doing the right thing here either, notifying a
SWITCHDEV_FDB_DEL_TO_BRIDGE from what is effectively the
SWITCHDEV_FDB_DEL_TO_DEVICE handler (port_fdb_del).

> +			call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
> +						 brport, &info.info, NULL);
> +			rtnl_unlock();
> +		} else {
> +			call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
> +						 brport, &info.info, NULL);
> +		}
> +	} else {
> +		dev_err(ale->chip->dev, "ERR: No bridge port for dsa port belonging to port %d\n",
> +			ale->port);
> +	}
> +}
> +
> +static inline void mv88e6xxx_atu_locked_entry_purge(struct atu_locked_entry *ale)

No inline functions in .c files.

> +{
> +	mv88e6xxx_atu_locked_entry_timer_work(ale);
> +	del_timer(&ale->timer);
> +	list_del(&ale->list);
> +	kfree(ale);
> +}
> +
> +static void mv88e6xxx_atu_locked_entry_cleanup(struct work_struct *work)
> +{
> +	struct dsa_port *dp = container_of(work, struct dsa_port, atu_work.work);
> +	struct atu_locked_entry *ale, *tmp;
> +
> +	mutex_lock(&dp->locked_entries_list_lock);
> +	list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
> +		if (ale->timed_out) {
> +			mv88e6xxx_atu_locked_entry_purge(ale);

Nasty lock ordering inversion. In mv88e6xxx_handle_violation() we take
&dp->locked_entries_list_lock with mv88e6xxx_reg_lock() held.
Here (in mv88e6xxx_atu_locked_entry_timer_work called from here) we take
mv88e6xxx_reg_lock() with &dp->locked_entries_list_lock held.

> +			atomic_dec(&dp->atu_locked_entry_cnt);
> +		}
> +	}
> +	mutex_unlock(&dp->locked_entries_list_lock);
> +
> +	mod_delayed_work(system_long_wq, &dp->atu_work, msecs_to_jiffies(100));
> +}
> +
> +static void mv88e6xxx_atu_locked_entry_timer_handler(struct timer_list *t)
> +{
> +	struct atu_locked_entry *ale = from_timer(ale, t, timer);
> +
> +	if (ale)
> +		ale->timed_out = true;
> +}
> +
> +struct mv88e6xxx_fid_search_ctx {
> +	u16 fid_search;
> +	u16 vid_found;
> +};
> +
> +static int mv88e6xxx_find_vid_on_matching_fid(struct mv88e6xxx_chip *chip,
> +					      const struct mv88e6xxx_vtu_entry *entry,
> +					      void *priv)
> +{
> +	struct mv88e6xxx_fid_search_ctx *ctx = priv;
> +
> +	if (ctx->fid_search == entry->fid) {
> +		ctx->vid_found = entry->vid;
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +int mv88e6xxx_handle_violation(struct mv88e6xxx_chip *chip,
> +			       int port,
> +			       struct mv88e6xxx_atu_entry *entry,
> +			       u16 fid,
> +			       u16 type)
> +{
> +	struct switchdev_notifier_fdb_info info = {
> +		.addr = entry->mac,
> +		.vid = 0,
> +		.added_by_user = false,
> +		.is_local = false,
> +		.offloaded = true,
> +		.locked = true,
> +	};
> +	struct atu_locked_entry *locked_entry;
> +	struct mv88e6xxx_fid_search_ctx ctx;
> +	struct net_device *brport;
> +	struct dsa_port *dp;
> +	int err;
> +
> +	ctx.fid_search = fid;
> +	err = mv88e6xxx_vtu_walk(chip, mv88e6xxx_find_vid_on_matching_fid, &ctx);
> +	if (err < 0)
> +		return err;
> +	if (err == 1)
> +		info.vid = ctx.vid_found;
> +	else
> +		return -ENODATA;
> +
> +	dp = dsa_to_port(chip->ds, port);
> +	brport = dsa_port_to_bridge_port(dp);
> +
> +	if (!brport)
> +		return -ENODEV;
> +
> +	switch (type) {
> +	case MV88E6XXX_G1_ATU_OP_MISS_VIOLATION:
> +		if (atomic_read(&dp->atu_locked_entry_cnt) >= ATU_LOCKED_ENTRIES_MAX) {
> +			mv88e6xxx_reg_unlock(chip);

You call mv88e6xxx_reg_lock() from mv88e6xxx_g1_atu_prob_irq_thread_fn()
and mv88e6xxx_reg_unlock() from mv88e6xxx_handle_violation()? Nice!

And I understand why that is: to avoid a lock ordering inversion with
rtnl_lock(). Just unlock the mv88e6xxx registers after the last hardware
access in mv88e6xxx_g1_atu_prob_irq_thread_fn() - after mv88e6xxx_g1_atu_mac_read(),
and call mv88e6xxx_handle_violation() with the registers unlocked, and
lock them when you need them.

> +			return 0;
> +		}
> +		entry->portvec = MV88E6XXX_G1_ATU_DATA_PORT_VECTOR_NO_EGRESS;
> +		entry->state = MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC;
> +		entry->trunk = false;
> +		err = mv88e6xxx_g1_atu_loadpurge(chip, fid, entry);
> +		if (err)
> +			goto fail;
> +
> +		locked_entry = kmalloc(sizeof(*locked_entry), GFP_ATOMIC);

Please be consistent in your naming of struct atu_locked_entry
variables, be they "locked_entry" or "ale" or otherwise.
And please create a helper function that creates such a structure and
initializes it.

> +		if (!locked_entry)
> +			return -ENOMEM;
> +		timer_setup(&locked_entry->timer, mv88e6xxx_atu_locked_entry_timer_handler, 0);

Does this have to be a dedicated timer per entry, or can you just record
the "jiffies" at creation time per locked entry, and compare it with the
current jiffies from the periodic, sleepable mv88e6xxx_atu_locked_entry_cleanup?

> +		locked_entry->timer.expires = jiffies + dp->ageing_time / 10;
> +		locked_entry->chip = chip;
> +		locked_entry->port = port;
> +		locked_entry->fid = fid;
> +		locked_entry->vid = info.vid;
> +		locked_entry->timed_out = false;
> +		memcpy(&locked_entry->mac, entry->mac, ETH_ALEN);
> +
> +		mutex_lock(&dp->locked_entries_list_lock);
> +		add_timer(&locked_entry->timer);
> +		list_add(&locked_entry->list, &dp->atu_locked_entries_list);
> +		mutex_unlock(&dp->locked_entries_list_lock);
> +		atomic_inc(&dp->atu_locked_entry_cnt);
> +
> +		mv88e6xxx_reg_unlock(chip);
> +
> +		rtnl_lock();
> +		err = call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
> +					       brport, &info.info, NULL);
> +		break;
> +	}
> +	rtnl_unlock();

Why is the rtnl_unlock() outside the switch statement but the rtnl_lock() inside?
Not to mention, the dsa_port_to_bridge_port() call needs to be under rtnl_lock().

> +
> +	return err;
> +
> +fail:
> +	mv88e6xxx_reg_unlock(chip);
> +	return err;
> +}
> +
> +bool mv88e6xxx_atu_locked_entry_find_purge(struct dsa_switch *ds, int port,
> +					   const unsigned char *addr, u16 vid)
> +{
> +	struct dsa_port *dp = dsa_to_port(ds, port);
> +	struct atu_locked_entry *ale, *tmp;
> +	bool found = false;
> +
> +	mutex_lock(&dp->locked_entries_list_lock);
> +	list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
> +		if (!memcmp(&ale->mac, addr, ETH_ALEN)) {
> +			if (ale->vid == vid) {
> +				mv88e6xxx_atu_locked_entry_purge(ale);
> +				atomic_dec(&dp->atu_locked_entry_cnt);
> +				found = true;
> +				break;
> +			}
> +		}
> +	}
> +	mutex_unlock(&dp->locked_entries_list_lock);
> +	return found;
> +}
> +
> +void mv88e6xxx_atu_locked_entry_flush(struct dsa_switch *ds, int port)
> +{
> +	struct dsa_port *dp = dsa_to_port(ds, port);
> +	struct atu_locked_entry *ale, *tmp;
> +
> +	mutex_lock(&dp->locked_entries_list_lock);
> +	list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
> +		mv88e6xxx_atu_locked_entry_purge(ale);
> +		atomic_dec(&dp->atu_locked_entry_cnt);
> +	}
> +	mutex_unlock(&dp->locked_entries_list_lock);
> +
> +	if (atomic_read(&dp->atu_locked_entry_cnt) != 0)
> +		dev_err(ds->dev,
> +			"ERROR: Locked entries count is not zero after flush on port %d\n",
> +			port);

And generally speaking, why would you expect it to be 0, since there's
nothing that stops this check from racing with mv88e6xxx_handle_violation?
I suspect that if locking is properly thought through, the atu_locked_entry_cnt
can just be a plain int instead of an improperly used atomic.

Also, random fact: no need to say ERROR when printing with the KERN_ERR
log level. It's kind of implied from the log level.

> +}
> +
> +void mv88e6xxx_init_violation_handler(struct dsa_switch *ds, int port)
> +{
> +	struct dsa_port *dp = dsa_to_port(ds, port);
> +
> +	INIT_LIST_HEAD(&dp->atu_locked_entries_list);
> +	mutex_init(&dp->locked_entries_list_lock);
> +	dp->atu_locked_entry_cnt.counter = 0;

atomic_set()

> +	INIT_DELAYED_WORK(&dp->atu_work, mv88e6xxx_atu_locked_entry_cleanup);
> +	mod_delayed_work(system_long_wq, &dp->atu_work, msecs_to_jiffies(100));
> +}
> +
> +void mv88e6xxx_teardown_violation_handler(struct dsa_switch *ds, int port)
> +{
> +	struct dsa_port *dp = dsa_to_port(ds, port);
> +
> +	cancel_delayed_work(&dp->atu_work);
> +	mv88e6xxx_atu_locked_entry_flush(ds, port);
> +	mutex_destroy(&dp->locked_entries_list_lock);
> +}
> diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h

This and mv88e6xxx_switchdev.c are the only source files belonging to
this driver which have the mv88e6xxx_ prefix (others are "chip.c" etc).
Can you please follow the convention?

> new file mode 100644
> index 000000000000..f0e7abf7c361
> --- /dev/null
> +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later
> + *
> + * mv88e6xxx_switchdev.h
> + *
> + *	Authors:
> + *	Hans J. Schultz		<hans.schultz@westermo.com>
> + *
> + */
> +
> +#ifndef DRIVERS_NET_DSA_MV88E6XXX_MV88E6XXX_SWITCHDEV_H_
> +#define DRIVERS_NET_DSA_MV88E6XXX_MV88E6XXX_SWITCHDEV_H_
> +
> +#include <net/switchdev.h>
> +#include "chip.h"
> +
> +#define ATU_LOCKED_ENTRIES_MAX	64
> +
> +struct atu_locked_entry {

mv88e6xxx driver specific structure names should be prefixed with mv88e6xxx_.

> +	struct list_head list;
> +	struct mv88e6xxx_chip *chip;
> +	int port;
> +	u8	mac[ETH_ALEN];

Either align everything with tabs, or nothing.

> +	u16 fid;
> +	u16 vid;
> +	struct timer_list timer;
> +	bool timed_out;
> +};
> +
> +int mv88e6xxx_handle_violation(struct mv88e6xxx_chip *chip,
> +			       int port,
> +			       struct mv88e6xxx_atu_entry *entry,
> +			       u16 fid,
> +			       u16 type);

Both this and the function definition can easily fit on 3 lines.

> +bool mv88e6xxx_atu_locked_entry_find_purge(struct dsa_switch *ds, int port,
> +					   const unsigned char *addr, u16 vid);
> +void mv88e6xxx_atu_locked_entry_flush(struct dsa_switch *ds, int port);
> +void mv88e6xxx_init_violation_handler(struct dsa_switch *ds, int port);
> +void mv88e6xxx_teardown_violation_handler(struct dsa_switch *ds, int port);
> +
> +#endif /* DRIVERS_NET_DSA_MV88E6XXX_MV88E6XXX_SWITCHDEV_H_ */
> diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
> index 795b3128768f..c4e5e7174129 100644
> --- a/drivers/net/dsa/mv88e6xxx/port.c
> +++ b/drivers/net/dsa/mv88e6xxx/port.c
> @@ -14,9 +14,11 @@
>  #include <linux/phylink.h>
>  
>  #include "chip.h"
> +#include "global1.h"
>  #include "global2.h"
>  #include "port.h"
>  #include "serdes.h"
> +#include "mv88e6xxx_switchdev.h"
>  
>  int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
>  			u16 *val)
> @@ -1239,6 +1241,25 @@ int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port,
>  	return err;
>  }
>  
> +bool mv88e6xxx_port_is_locked(struct mv88e6xxx_chip *chip, int port, bool chiplock)
> +{
> +	bool locked = false;
> +	u16 reg;
> +
> +	if (chiplock)
> +		mv88e6xxx_reg_lock(chip);

Please, no "if (chiplock) mutex_lock()" hacks. Just lockdep_assert_held(&chip->reg_lock),
which serves both for documentation and for validation purposes, ensure
the lock is always taken at the caller (which in this case is super easy)
and move on.

> +
> +	if (mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, &reg))
> +		goto out;

It would be good to actually propagate the error to the caller and
"locked" via a pass-by-reference bool pointer argument, not just say
that I/O errors mean that the port is unlocked.

> +	locked = reg & MV88E6XXX_PORT_CTL0_SA_FILT_DROP_ON_LOCK;
> +
> +out:
> +	if (chiplock)
> +		mv88e6xxx_reg_unlock(chip);
> +
> +	return locked;
> +}
> +
>  int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port,
>  			    bool locked)
>  {
> @@ -1261,10 +1282,13 @@ int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port,
>  	if (err)
>  		return err;
>  
> -	reg &= ~MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT;
> -	if (locked)
> -		reg |= MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT;
> -
> +	reg &= MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK;
> +	if (locked) {
> +		reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> +			MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> +			MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> +			MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;

I'd suggest aligning these macros vertically.

> +	}
>  	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR, reg);
>  }
>  
> diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
> index e0a705d82019..d377abd6ab17 100644
> --- a/drivers/net/dsa/mv88e6xxx/port.h
> +++ b/drivers/net/dsa/mv88e6xxx/port.h
> @@ -231,6 +231,7 @@
>  #define MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT		0x2000
>  #define MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG	0x1000
>  #define MV88E6XXX_PORT_ASSOC_VECTOR_REFRESH_LOCKED	0x0800
> +#define MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK		0x07ff
>  
>  /* Offset 0x0C: Port ATU Control */
>  #define MV88E6XXX_PORT_ATU_CTL		0x0c
> @@ -374,6 +375,7 @@ int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid);
>  int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid);
>  int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid);
>  
> +bool mv88e6xxx_port_is_locked(struct mv88e6xxx_chip *chip, int port, bool chiplock);
>  int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port,
>  			    bool locked);
>  
> -- 
> 2.30.2
>
Hans S June 28, 2022, 12:26 p.m. UTC | #4
On Mon, Jun 27, 2022 at 8:06 PM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> Hi Hans,
>
> On Tue, May 24, 2022 at 05:21:43PM +0200, Hans Schultz wrote:
> > This implementation for the Marvell mv88e6xxx chip series, is
> > based on handling ATU miss violations occurring when packets
> > ingress on a port that is locked. The mac address triggering
> > the ATU miss violation is communicated through switchdev to
> > the bridge module, which adds a fdb entry with the fdb locked
> > flag set. The entry is kept according to the bridges ageing
> > time, thus simulating a dynamic entry.
> >
> > Note: The locked port must have learning enabled for the ATU
> > miss violation to occur.
> >
> > Signed-off-by: Hans Schultz <schultz.hans+netdev@gmail.com>
> > ---
>
> I'm sorry that I couldn't focus on the big picture of this patch,
> but locking is an absolute disaster and I just stopped after a while,
> it's really distracting :)

The code works, but I think that we should "undisaster" it. :)

>
> Would you mind addressing the feedback below first, and I'll take
> another look when you send v4?

fine :)

> >       if (err)
> >               dev_err(chip->dev,
> >                       "p%d: failed to force MAC link down\n", port);
> > +     else
> > +             if (mv88e6xxx_port_is_locked(chip, port, true))
> > +                     mv88e6xxx_atu_locked_entry_flush(ds, port);
>
> This is superfluous, is it not? The bridge will transition a port whose
> link goes down to BR_STATE_DISABLED, which will make dsa_port_set_state()
> fast-age the dynamic FDB entries on the port, which you've already
> handled below.

I guess you are right.


> >  }
> >
> >  static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port,
> > @@ -1685,6 +1689,9 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
> >       struct mv88e6xxx_chip *chip = ds->priv;
> >       int err;
> >
> > +     if (mv88e6xxx_port_is_locked(chip, port, true))
> > +             mv88e6xxx_atu_locked_entry_flush(ds, port);
> > +
>
> Dumb question: if you only flush the locked entries at fast age if the
> port is locked, then what happens with the existing locked entries if
> the port becomes unlocked before an FDB flush takes place?
> Shouldn't mv88e6xxx_port_set_lock() call mv88e6xxx_atu_locked_entry_flush()
> too?

That was my first thought too, but the way the flags are handled with
the mask etc, does so that
mv88e6xxx_port_set_lock() is called when other flags change. It could
be done by the transition
from locked->unlocked by checking if the port is locked already.
On the other hand, the timers will timeout and the entries will be
removed anyhow.

> > +     if (mv88e6xxx_port_is_locked(chip, port, true))
> > +             mv88e6xxx_atu_locked_entry_find_purge(ds, port, addr, vid);
> > +
> >       mv88e6xxx_reg_lock(chip);
> >       err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
> > -                                        MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
> > +                     MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
>
> Unrelated and unjustified change.
>
Ups, missed that one.

> >  static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
> >  {
> > -     return mv88e6xxx_setup_devlink_regions_port(ds, port);
> > +     int err;
> > +
> > +     err = mv88e6xxx_setup_devlink_regions_port(ds, port);
> > +     mv88e6xxx_init_violation_handler(ds, port);
>
> What's with this quirky placement? You need to do error checking and
> call mv88e6xxx_teardown_violation_handler() if setting up the devlink
> port regions fails, otherwise the port will fail to probe but no one
> will quiesce its delayed ATU work.

Yes, of course.

> By the way, do all mv88e6xxx switches support 802.1X and MAC Auth Bypass,
> or do we need to initialize these structures depending on some capability?

I will have to look into that, but I think they all do support these features.

> > +     err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_ATU_OP,
> > +                              MV88E6XXX_G1_ATU_OP_BUSY | MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION);
>
> Split on 3 lines please.

OK.

> > +     if (err)
> > +             return err;
> > +
> > +     return mv88e6xxx_g1_atu_op_wait(chip);
> > +}
> > +
> >  static int mv88e6xxx_g1_atu_op(struct mv88e6xxx_chip *chip, u16 fid, u16 op)
> >  {
> >       u16 val;
> > @@ -356,11 +370,11 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
> >       int spid;
> >       int err;
> >       u16 val;
> > +     u16 fid;
> >
> >       mv88e6xxx_reg_lock(chip);
> >
> > -     err = mv88e6xxx_g1_atu_op(chip, 0,
> > -                               MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION);
> > +     err = mv88e6xxx_g1_read_atu_violation(chip);
>
> I cannot comment on the validity of this change: previously, we were
> writing FID 0 as part of mv88e6xxx_g1_atu_op(), now we are reading back
> the FID. Definitely too much going on in a single change, this needs a
> separate patch with an explanation.

It is of course needed to read the fid and I couldn't really
understand the reasoning behind how it was before,
but I will do as you say as best I can.

>
> >       if (err)
> >               goto out;
> >
> > @@ -368,6 +382,10 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
> >       if (err)
> >               goto out;
> >
> > +     err = mv88e6xxx_g1_read(chip, MV88E6352_G1_ATU_FID, &fid);
> > +     if (err)
> > +             goto out;
>
> Is it ok to read the MV88E6352_G1_ATU_FID register from an IRQ handler
> common for all switches, I wonder?

I don't know about the naming of this define (I probably overlooked
the 6352 part), but it is the same as I have in the
spec for 6097, and I don't see any alternative...

> > +
> >       err = mv88e6xxx_g1_atu_data_read(chip, &entry);
> >       if (err)
> >               goto out;
> > @@ -382,6 +400,11 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
> >               dev_err_ratelimited(chip->dev,
> >                                   "ATU age out violation for %pM\n",
> >                                   entry.mac);
> > +             err = mv88e6xxx_handle_violation(chip,
> > +                                              chip->ports[spid].port,
>
> Dumb question: isn't chip->ports[spid].port == spid?

Probably you are right.

>
> > +                                              &entry,
> > +                                              fid,
> > +                                              MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION);
>
> This fits on 3 lines instead of 5 (and same below).

OK

>
> > +static void mv88e6xxx_atu_locked_entry_timer_work(struct atu_locked_entry *ale)
>
> Please find a more adequate name for this function.

Any suggestions?

>
> > +{
> > +     struct switchdev_notifier_fdb_info info = {
> > +             .addr = ale->mac,
> > +             .vid = ale->vid,
> > +             .added_by_user = false,
> > +             .is_local = false,
>
> No need to have an initializer for the false members.

OK

> > +             .offloaded = true,
> > +             .locked = true,
> > +     };
> > +     struct mv88e6xxx_atu_entry entry;
> > +     struct net_device *brport;
> > +     struct dsa_port *dp;
> > +
> > +     entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UC_UNUSED;
> > +     entry.trunk = false;
> > +     memcpy(&entry.mac, &ale->mac, ETH_ALEN);
>
> ether_addr_copy
>
> > +
> > +     mv88e6xxx_reg_lock(ale->chip);
> > +     mv88e6xxx_g1_atu_loadpurge(ale->chip, ale->fid, &entry);
>
> The portvec will be junk memory that's on stack, is that what you want?
>
Probably not what I want.

> > +     if (brport) {
> > +             if (!rtnl_is_locked()) {
> > +                     rtnl_lock();
>
> No, no, no, no, no, no, no.
>
> As I've explained already:
> https://patchwork.kernel.org/project/netdevbpf/patch/20220317093902.1305816-4-schultz.hans+netdev@gmail.com/#24782974
> dsa_port_to_bridge_port() needs to be called with the rtnl_mutex held.
>
> Please take a moment to figure out which function expects which lock and
> for what operation, then draw a call graph, figure out a consistent lock
> hierarchy where things are always acquired in the same order, and if a
> function needs a locking context but not all callers offer it, put an
> ASSERT_RTNL() (for example) and transfer the locking responsibility to
> the caller.

As I remember it was because mv88e6xxx_atu_locked_entry_flush() was called both
with and without the lock, but there was something I didn't know about
how link down
handling works.

>
> Doing this will also help you name your functions better than
> "locked entry timer work" (which are called from... drum roll...
> mv88e6xxx_port_fdb_del and mv88e6xxx_port_fast_age).
>
> Which by the way, reminds me that.....
> You can't take rtnl_lock() from port_fdb_add() and port_fdb_del(),
> see commits d7d0d423dbaa ("net: dsa: flush switchdev workqueue when
> leaving the bridge") and 0faf890fc519 ("net: dsa: drop rtnl_lock from
> dsa_slave_switchdev_event_work"), as you'll deadlock with
> dsa_port_pre_bridge_leave(). In fact you never could, but for a slightly
> different reason.
>
> From the discussion with Ido and Nikolay I get the impression that
> you're not doing the right thing here either, notifying a
> SWITCHDEV_FDB_DEL_TO_BRIDGE from what is effectively the
> SWITCHDEV_FDB_DEL_TO_DEVICE handler (port_fdb_del).

Hmm, my experience tells me that much is opposite the normal
conventions when dealing with
locked ports, as there was never switchdev notifications from the
driver to the bridge before, but
that is needed to keep ATU and FDB entries in sync.

>
> No inline functions in .c files.

OK

> Nasty lock ordering inversion. In mv88e6xxx_handle_violation() we take
> &dp->locked_entries_list_lock with mv88e6xxx_reg_lock() held.
> Here (in mv88e6xxx_atu_locked_entry_timer_work called from here) we take
> mv88e6xxx_reg_lock() with &dp->locked_entries_list_lock held.
>
I will look into that.

> > +     switch (type) {
> > +     case MV88E6XXX_G1_ATU_OP_MISS_VIOLATION:
> > +             if (atomic_read(&dp->atu_locked_entry_cnt) >= ATU_LOCKED_ENTRIES_MAX) {
> > +                     mv88e6xxx_reg_unlock(chip);
>
> You call mv88e6xxx_reg_lock() from mv88e6xxx_g1_atu_prob_irq_thread_fn()
> and mv88e6xxx_reg_unlock() from mv88e6xxx_handle_violation()? Nice!
>
> And I understand why that is: to avoid a lock ordering inversion with
> rtnl_lock(). Just unlock the mv88e6xxx registers after the last hardware
> access in mv88e6xxx_g1_atu_prob_irq_thread_fn() - after mv88e6xxx_g1_atu_mac_read(),
> and call mv88e6xxx_handle_violation() with the registers unlocked, and
> lock them when you need them.

OK.

> > +             locked_entry = kmalloc(sizeof(*locked_entry), GFP_ATOMIC);
>
> Please be consistent in your naming of struct atu_locked_entry
> variables, be they "locked_entry" or "ale" or otherwise.
> And please create a helper function that creates such a structure and
> initializes it.

OK

> > +             if (!locked_entry)
> > +                     return -ENOMEM;
> > +             timer_setup(&locked_entry->timer, mv88e6xxx_atu_locked_entry_timer_handler, 0);
>
> Does this have to be a dedicated timer per entry, or can you just record
> the "jiffies" at creation time per locked entry, and compare it with the
> current jiffies from the periodic, sleepable mv88e6xxx_atu_locked_entry_cleanup?

I think that approach should be sufficient too.

>
> Why is the rtnl_unlock() outside the switch statement but the rtnl_lock() inside?
> Not to mention, the dsa_port_to_bridge_port() call needs to be under rtnl_lock().

Just a small optimization as I also have another case of the switch
(only one switch case if
you didn't notice) belonging to the next patch set regarding dynamic
ATU entries.

> > +void mv88e6xxx_atu_locked_entry_flush(struct dsa_switch *ds, int port)
> > +{
> > +     struct dsa_port *dp = dsa_to_port(ds, port);
> > +     struct atu_locked_entry *ale, *tmp;
> > +
> > +     mutex_lock(&dp->locked_entries_list_lock);
> > +     list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
> > +             mv88e6xxx_atu_locked_entry_purge(ale);
> > +             atomic_dec(&dp->atu_locked_entry_cnt);
> > +     }
> > +     mutex_unlock(&dp->locked_entries_list_lock);
> > +
> > +     if (atomic_read(&dp->atu_locked_entry_cnt) != 0)
> > +             dev_err(ds->dev,
> > +                     "ERROR: Locked entries count is not zero after flush on port %d\n",
> > +                     port);
>
> And generally speaking, why would you expect it to be 0, since there's
> nothing that stops this check from racing with mv88e6xxx_handle_violation?

I guess you are right that when setting the port STP state to BLOCKED, there is
the potential race you mention.

> Also, random fact: no need to say ERROR when printing with the KERN_ERR
> log level. It's kind of implied from the log level.

Of course.

> > +     dp->atu_locked_entry_cnt.counter = 0;
>
> atomic_set()

Right!

> This and mv88e6xxx_switchdev.c are the only source files belonging to
> this driver which have the mv88e6xxx_ prefix (others are "chip.c" etc).
> Can you please follow the convention?

Yes. I think I got that idea from some other driver, thus avoiding
switchdev.h file,
but I will change it.

> > +struct atu_locked_entry {
>
> mv88e6xxx driver specific structure names should be prefixed with mv88e6xxx_.

OK

> > +     u8      mac[ETH_ALEN];
>
> Either align everything with tabs, or nothing.

Ups.

> > +int mv88e6xxx_handle_violation(struct mv88e6xxx_chip *chip,
> > +                            int port,
> > +                            struct mv88e6xxx_atu_entry *entry,
> > +                            u16 fid,
> > +                            u16 type);
>
> Both this and the function definition can easily fit on 3 lines.

OK

> Please, no "if (chiplock) mutex_lock()" hacks. Just lockdep_assert_held(&chip->reg_lock),
> which serves both for documentation and for validation purposes, ensure
> the lock is always taken at the caller (which in this case is super easy)
> and move on.

As I am calling the function in if statement checks, it would make
that code more messy, while with
this approach the function can be called from anywhere. I also looked
at having two functions, with
one being a wrapper function taking the lock and calling the other...

>
> > +
> > +     if (mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, &reg))
> > +             goto out;
>
> It would be good to actually propagate the error to the caller and
> "locked" via a pass-by-reference bool pointer argument, not just say
> that I/O errors mean that the port is unlocked.

Again the wish to be able to call it from if statement checks,.

> > +     reg &= MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK;
> > +     if (locked) {
> > +             reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> > +                     MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> > +                     MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> > +                     MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
>
> I'd suggest aligning these macros vertically.

They are according to the Linux kernel coding standard wrt indentation afaik.
Hans S July 5, 2022, 3:05 p.m. UTC | #5
Hi, does anybody know what it going on with this variable?
struct dsa_port *dp ->ageing_time;

I experience that it changes value like a factor ~10 at times.
Vladimir Oltean July 6, 2022, 8:55 a.m. UTC | #6
On Tue, Jun 28, 2022 at 02:26:43PM +0200, Hans S wrote:
> > Dumb question: if you only flush the locked entries at fast age if the
> > port is locked, then what happens with the existing locked entries if
> > the port becomes unlocked before an FDB flush takes place?
> > Shouldn't mv88e6xxx_port_set_lock() call mv88e6xxx_atu_locked_entry_flush()
> > too?
> 
> That was my first thought too, but the way the flags are handled with the mask etc, does so that
> mv88e6xxx_port_set_lock() is called when other flags change. It could be done by the transition
> from locked->unlocked by checking if the port is locked already.

Why does mv88e6xxx_port_set_lock() get called when other flags change?

> On the other hand, the timers will timeout and the entries will be removed anyhow.

> > > +static void mv88e6xxx_atu_locked_entry_timer_work(struct atu_locked_entry *ale)
> >
> > Please find a more adequate name for this function.
> 
> Any suggestions?

Not sure. It depends on whether you leave just the logic to delete a
locked ATU entry, or also the switchdev FDB_DEL_TO_BRIDGE notifier.
In any case, pick a name that reflects what it does. Something with
locked_entry_delete() can't be too wrong.

> > From the discussion with Ido and Nikolay I get the impression that
> > you're not doing the right thing here either, notifying a
> > SWITCHDEV_FDB_DEL_TO_BRIDGE from what is effectively the
> > SWITCHDEV_FDB_DEL_TO_DEVICE handler (port_fdb_del).
> 
> Hmm, my experience tells me that much is opposite the normal
> conventions when dealing with
> locked ports, as there was never switchdev notifications from the
> driver to the bridge before, but
> that is needed to keep ATU and FDB entries in sync.

On delete you mean? So the bridge signals switchdev a deletion of a
locked FDB entry (as I pointed out, this function gets indirectly called
from port_fdb_del), but it won't get deleted until switchdev signals it
back, is what you're saying?

> > Why is the rtnl_unlock() outside the switch statement but the rtnl_lock() inside?
> > Not to mention, the dsa_port_to_bridge_port() call needs to be under rtnl_lock().
> 
> Just a small optimization as I also have another case of the switch
> (only one switch case if
> you didn't notice) belonging to the next patch set regarding dynamic
> ATU entries.

What kind of optimization are you even talking about? Please get rid of
coding patterns like this, sorry.

> > Please, no "if (chiplock) mutex_lock()" hacks. Just lockdep_assert_held(&chip->reg_lock),
> > which serves both for documentation and for validation purposes, ensure
> > the lock is always taken at the caller (which in this case is super easy)
> > and move on.
> 
> As I am calling the function in if statement checks, it would make
> that code more messy, while with
> this approach the function can be called from anywhere. I also looked
> at having two functions, with
> one being a wrapper function taking the lock and calling the other...

There are many functions in mv88e6xxx that require the reg_lock to be
held, there's nothing new or special here.

> >
> > > +
> > > +     if (mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, &reg))
> > > +             goto out;
> >
> > It would be good to actually propagate the error to the caller and
> > "locked" via a pass-by-reference bool pointer argument, not just say
> > that I/O errors mean that the port is unlocked.
> 
> Again the wish to be able to call it from if statement checks,.
> 
> > > +     reg &= MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK;
> > > +     if (locked) {
> > > +             reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
> >
> > I'd suggest aligning these macros vertically.
> 
> They are according to the Linux kernel coding standard wrt indentation afaik.

Compare:

		reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
			MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
			MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
			MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;

with:

		reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
		       MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
		       MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
		       MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
Hans S July 6, 2022, 10:12 a.m. UTC | #7
On Wed, Jul 6, 2022 at 10:56 AM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 02:26:43PM +0200, Hans S wrote:
> > > Dumb question: if you only flush the locked entries at fast age if the
> > > port is locked, then what happens with the existing locked entries if
> > > the port becomes unlocked before an FDB flush takes place?
> > > Shouldn't mv88e6xxx_port_set_lock() call mv88e6xxx_atu_locked_entry_flush()
> > > too?

BTW:
>> @@ -919,6 +920,9 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
>>       if (err)
>>               dev_err(chip->dev,
>>                       "p%d: failed to force MAC link down\n", port);
>> +     else
>> +             if (mv88e6xxx_port_is_locked(chip, port, true))
>> +                     mv88e6xxx_atu_locked_entry_flush(ds, port);
>
>This is superfluous, is it not? The bridge will transition a port whose
>link goes down to BR_STATE_DISABLED, which will make dsa_port_set_state()
>fast-age the dynamic FDB entries on the port, which you've already
>handled below.

I removed this code, but then on link down the locked entries were not
cleared out. Something not as thought?

> >
> > That was my first thought too, but the way the flags are handled with the mask etc, does so that
> > mv88e6xxx_port_set_lock() is called when other flags change. It could be done by the transition
> > from locked->unlocked by checking if the port is locked already.
>
> Why does mv88e6xxx_port_set_lock() get called when other flags change?

That is what seems to happen, but maybe I am wrong. Looking at the dsa
functions dsa_port_inherit_brport_flags() and
dsa_port_clear_brport_flags(), they set the mask for which underlying
function is called, and it looks to me that they call once for all the
flags: BR_LEARNING, BR_FLOOD, BR_MCAST_FLOOD, BR_BCAST_FLOOD,
BR_PORT_LOCKED?

>
> > On the other hand, the timers will timeout and the entries will be removed anyhow.
>
> > > > +static void mv88e6xxx_atu_locked_entry_timer_work(struct atu_locked_entry *ale)
> > >
> > > Please find a more adequate name for this function.
> >
> > Any suggestions?
>
> Not sure. It depends on whether you leave just the logic to delete a
> locked ATU entry, or also the switchdev FDB_DEL_TO_BRIDGE notifier.
> In any case, pick a name that reflects what it does. Something with
> locked_entry_delete() can't be too wrong.
>
> > > From the discussion with Ido and Nikolay I get the impression that
> > > you're not doing the right thing here either, notifying a
> > > SWITCHDEV_FDB_DEL_TO_BRIDGE from what is effectively the
> > > SWITCHDEV_FDB_DEL_TO_DEVICE handler (port_fdb_del).
> >
> > Hmm, my experience tells me that much is opposite the normal
> > conventions when dealing with
> > locked ports, as there was never switchdev notifications from the
> > driver to the bridge before, but
> > that is needed to keep ATU and FDB entries in sync.
>
> On delete you mean? So the bridge signals switchdev a deletion of a
> locked FDB entry (as I pointed out, this function gets indirectly called
> from port_fdb_del), but it won't get deleted until switchdev signals it
> back, is what you're saying?
>

When added they are added with bridge FDB flags: extern_learn offload
locked, with a SWITCHDEV_FDB_ADD_TO_BRIDGE event. So they are owned by
the driver.
When the driver deletes the locked entry the bridge FDB entry is
removes by the SWITCHDEV_FDB_DEL_TO_BRIDGE event from the driver. That
seems quite fair?

> > > Why is the rtnl_unlock() outside the switch statement but the rtnl_lock() inside?
> > > Not to mention, the dsa_port_to_bridge_port() call needs to be under rtnl_lock().
> >
> > Just a small optimization as I also have another case of the switch
> > (only one switch case if
> > you didn't notice) belonging to the next patch set regarding dynamic
> > ATU entries.
>
> What kind of optimization are you even talking about? Please get rid of
> coding patterns like this, sorry.
>
Right!

> > > Please, no "if (chiplock) mutex_lock()" hacks. Just lockdep_assert_held(&chip->reg_lock),
> > > which serves both for documentation and for validation purposes, ensure
> > > the lock is always taken at the caller (which in this case is super easy)
> > > and move on.
> >
> > As I am calling the function in if statement checks, it would make
> > that code more messy, while with
> > this approach the function can be called from anywhere. I also looked
> > at having two functions, with
> > one being a wrapper function taking the lock and calling the other...
>
> There are many functions in mv88e6xxx that require the reg_lock to be
> held, there's nothing new or special here.
>

Now I take the lock in the function regardless. No boolean.

> > >
> > > > +
> > > > +     if (mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, &reg))
> > > > +             goto out;
> > >
> > > It would be good to actually propagate the error to the caller and
> > > "locked" via a pass-by-reference bool pointer argument, not just say
> > > that I/O errors mean that the port is unlocked.
> >
> > Again the wish to be able to call it from if statement checks,.
> >
> > > > +     reg &= MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK;
> > > > +     if (locked) {
> > > > +             reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> > > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> > > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> > > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
> > >
> > > I'd suggest aligning these macros vertically.
> >
> > They are according to the Linux kernel coding standard wrt indentation afaik.
>
> Compare:
>
>                 reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
>                         MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
>                         MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
>                         MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
>
> with:
>
>                 reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
>                        MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
>                        MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
>                        MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;

I cannot see the difference here...?

Another issue...

I have removed the timers as they are superfluous and now just use the
worker and jiffies. But I have found that the whole ageing time seems
to be broken on the 5.17 kernel I am running. I don't know if it has
been fixed, but the ageing timeout is supposed to be given in seconds.
Here is the output from various functions after the command "ip link
set dev br0 type bridge ageing_time 1500" (that is nominally 1500
seconds according to man page!):

dsa_switch_ageing_time: ageing_time 10000, ageing_time_min 1000,
ageing_time_max 3825000
mv88e6xxx_set_ageing_time: set ageing time to 10000
br0: failed (err=-34) to set attribute (id=6)
dsa_switch_ageing_time: ageing_time 15000, ageing_time_min 1000,
ageing_time_max 3825000
mv88e6xxx_set_ageing_time: set ageing time to 15000

The 15000 set corresponds to 150 seconds! (I hardcoded the dsa
ageing_time_min to 1000)
Vladimir Oltean July 6, 2022, 1:28 p.m. UTC | #8
On Tue, Jul 05, 2022 at 05:05:52PM +0200, Hans S wrote:
> Hi, does anybody know what it going on with this variable?
> struct dsa_port *dp ->ageing_time;
> 
> I experience that it changes value like a factor ~10 at times.

Could you be a bit more specific? Are you talking about STP Topology
Change Notification BPDUs, which trigger this code path?

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 7d27b2e6038f..9b25bc2dcb3e 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -671,10 +671,10 @@ void __br_set_topology_change(struct net_bridge *br, unsigned char val)
 
 		if (val) {
 			t = 2 * br->forward_delay;
-			br_debug(br, "decreasing ageing time to %lu\n", t);
+			br_info(br, "decreasing ageing time to %lu\n", t);
 		} else {
 			t = br->bridge_ageing_time;
-			br_debug(br, "restoring ageing time to %lu\n", t);
+			br_info(br, "restoring ageing time to %lu\n", t);
 		}
 
 		err = __set_ageing_time(br->dev, t);

Coincidentally the default values of 2 * br->forward_delay and br->bridge_ageing_time
are 1 order of magnitude apart from each other.

[  139.998310] br0: topology change detected, propagating
[  140.003490] br0: decreasing ageing time to 3000
[  175.193054] br0: restoring ageing time to 30000

What's the problem anyway?
Hans S July 6, 2022, 1:48 p.m. UTC | #9
On Wed, Jul 6, 2022 at 3:28 PM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> On Tue, Jul 05, 2022 at 05:05:52PM +0200, Hans S wrote:
> > Hi, does anybody know what it going on with this variable?
> > struct dsa_port *dp ->ageing_time;
> >
> > I experience that it changes value like a factor ~10 at times.
>
> Could you be a bit more specific? Are you talking about STP Topology
> Change Notification BPDUs, which trigger this code path?
>
> diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
> index 7d27b2e6038f..9b25bc2dcb3e 100644
> --- a/net/bridge/br_stp.c
> +++ b/net/bridge/br_stp.c
> @@ -671,10 +671,10 @@ void __br_set_topology_change(struct net_bridge *br, unsigned char val)
>
>                 if (val) {
>                         t = 2 * br->forward_delay;
> -                       br_debug(br, "decreasing ageing time to %lu\n", t);
> +                       br_info(br, "decreasing ageing time to %lu\n", t);
>                 } else {
>                         t = br->bridge_ageing_time;
> -                       br_debug(br, "restoring ageing time to %lu\n", t);
> +                       br_info(br, "restoring ageing time to %lu\n", t);
>                 }
>
>                 err = __set_ageing_time(br->dev, t);
>
> Coincidentally the default values of 2 * br->forward_delay and br->bridge_ageing_time
> are 1 order of magnitude apart from each other.
>
> [  139.998310] br0: topology change detected, propagating
> [  140.003490] br0: decreasing ageing time to 3000
> [  175.193054] br0: restoring ageing time to 30000
>
> What's the problem anyway?

It might be a topology change as you indicate, though I am not sure.
So I am not using that variable any more for determining the ageing
time for the locked FDB entries, but instead I have made a function to
read the time from the chip instead.

The problem with that, I have mentioned in my latest reply to the
mac-auth patch set...
Hans S July 6, 2022, 2:23 p.m. UTC | #10
> >> @@ -919,6 +920,9 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
> >>       if (err)
> >>               dev_err(chip->dev,
> >>                       "p%d: failed to force MAC link down\n", port);
> >> +     else
> >> +             if (mv88e6xxx_port_is_locked(chip, port, true))
> >> +                     mv88e6xxx_atu_locked_entry_flush(ds, port);
> >
> >This is superfluous, is it not? The bridge will transition a port whose
> >link goes down to BR_STATE_DISABLED, which will make dsa_port_set_state()
> >fast-age the dynamic FDB entries on the port, which you've already
> >handled below.
>
> I removed this code, but then on link down the locked entries were not
> cleared out. Something not as thought?

I don't see a fast ageing happening on link down. There is the two cases:
1. Soft link down

With iproute2 command the link is brought down and
mv88e6xxx_mac_link_down() is called with rtnl lock taken.

2. Hard link down

I remove the cable from the port and mv88e6xxx_mac_link_down() is
called without rtnl lock.

As the hard link down case calls without rtnl lock, either I trigger
the case you have mentioned or I have to use rtnl_is_locked()
somewhere along the line?
Vladimir Oltean July 6, 2022, 2:33 p.m. UTC | #11
On Wed, Jul 06, 2022 at 12:12:01PM +0200, Hans S wrote:
> On Wed, Jul 6, 2022 at 10:56 AM Vladimir Oltean <olteanv@gmail.com> wrote:
> >
> > On Tue, Jun 28, 2022 at 02:26:43PM +0200, Hans S wrote:
> > > > Dumb question: if you only flush the locked entries at fast age if the
> > > > port is locked, then what happens with the existing locked entries if
> > > > the port becomes unlocked before an FDB flush takes place?
> > > > Shouldn't mv88e6xxx_port_set_lock() call mv88e6xxx_atu_locked_entry_flush()
> > > > too?
> 
> BTW:
> >> @@ -919,6 +920,9 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
> >>       if (err)
> >>               dev_err(chip->dev,
> >>                       "p%d: failed to force MAC link down\n", port);
> >> +     else
> >> +             if (mv88e6xxx_port_is_locked(chip, port, true))
> >> +                     mv88e6xxx_atu_locked_entry_flush(ds, port);
> >
> >This is superfluous, is it not? The bridge will transition a port whose
> >link goes down to BR_STATE_DISABLED, which will make dsa_port_set_state()
> >fast-age the dynamic FDB entries on the port, which you've already
> >handled below.
> 
> I removed this code, but then on link down the locked entries were not
> cleared out. Something not as thought?

What was the port's STP state before the link down event, and did it
change after the link down?

The relevant code in DSA is:

int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
{
	struct dsa_switch *ds = dp->ds;
	int port = dp->index;

	if (!ds->ops->port_stp_state_set)
		return -EOPNOTSUPP;

	ds->ops->port_stp_state_set(ds, port, state);

	if (!dsa_port_can_configure_learning(dp) ||
	    (do_fast_age && dp->learning)) {
		/* Fast age FDB entries or flush appropriate forwarding database
		 * for the given port, if we are moving it from Learning or
		 * Forwarding state, to Disabled or Blocking or Listening state.
		 * Ports that were standalone before the STP state change don't
		 * need to fast age the FDB, since address learning is off in
		 * standalone mode.
		 */

		if ((dp->stp_state == BR_STATE_LEARNING ||
		     dp->stp_state == BR_STATE_FORWARDING) &&
		    (state == BR_STATE_DISABLED ||
		     state == BR_STATE_BLOCKING ||
		     state == BR_STATE_LISTENING))
			dsa_port_fast_age(dp);
	}

	dp->stp_state = state;

	return 0;
}

If the STP state wasn't LEARNING or FORWARDING, there weren't supposed
to be dynamic FDB entries on the port in the first place, so DSA says
there's nothing to flush, and doesn't call dsa_port_fast_age().
Are there dynamic FDB entries being installed on a port that isn't
in a state that's supposed to learn? I guess the answer is yes.
Is that what you want, or should the locked entries be recorded only in
the LEARNING or FORWARDING states, otherwise discarded?

> > > That was my first thought too, but the way the flags are handled with the mask etc, does so that
> > > mv88e6xxx_port_set_lock() is called when other flags change. It could be done by the transition
> > > from locked->unlocked by checking if the port is locked already.
> >
> > Why does mv88e6xxx_port_set_lock() get called when other flags change?
> 
> That is what seems to happen, but maybe I am wrong. Looking at the dsa
> functions dsa_port_inherit_brport_flags() and
> dsa_port_clear_brport_flags(), they set the mask for which underlying
> function is called, and it looks to me that they call once for all the
> flags: BR_LEARNING, BR_FLOOD, BR_MCAST_FLOOD, BR_BCAST_FLOOD,
> BR_PORT_LOCKED?

What you actually want to say is: "mv88e6xxx_port_set_lock() is also
called when the DSA port joins a bridge, due to the switchdev attribute
replay logic present in dsa_port_switchdev_sync_attrs()".

Which, by the way, is logic that you've added yourself, in commit
b9e8b58fd2cb ("net: dsa: Include BR_PORT_LOCKED in the list of synced
brport flags") ;)

You are free to return early from mv88e6xxx_port_set_lock() if nothing has
changed. The DSA layer doesn't keep track of the locked state of the
port so it cannot deduce whether propagating to the switch driver is
necessary or not.

> > > > From the discussion with Ido and Nikolay I get the impression that
> > > > you're not doing the right thing here either, notifying a
> > > > SWITCHDEV_FDB_DEL_TO_BRIDGE from what is effectively the
> > > > SWITCHDEV_FDB_DEL_TO_DEVICE handler (port_fdb_del).
> > >
> > > Hmm, my experience tells me that much is opposite the normal
> > > conventions when dealing with
> > > locked ports, as there was never switchdev notifications from the
> > > driver to the bridge before, but
> > > that is needed to keep ATU and FDB entries in sync.
> >
> > On delete you mean? So the bridge signals switchdev a deletion of a
> > locked FDB entry (as I pointed out, this function gets indirectly called
> > from port_fdb_del), but it won't get deleted until switchdev signals it
> > back, is what you're saying?
> >
> 
> When added they are added with bridge FDB flags: extern_learn offload
> locked, with a SWITCHDEV_FDB_ADD_TO_BRIDGE event. So they are owned by
> the driver.
> When the driver deletes the locked entry the bridge FDB entry is
> removes by the SWITCHDEV_FDB_DEL_TO_BRIDGE event from the driver. That
> seems quite fair?

I'm just pointing out that you left other (probably unintended) code
paths for which the SWITCHDEV_FDB_DEL_TO_BRIDGE notifier is quite
useless. I haven't yet looked at your newest revision to see what
changed there.

> > > > Why is the rtnl_unlock() outside the switch statement but the rtnl_lock() inside?
> > > > Not to mention, the dsa_port_to_bridge_port() call needs to be under rtnl_lock().
> > >
> > > Just a small optimization as I also have another case of the switch
> > > (only one switch case if
> > > you didn't notice) belonging to the next patch set regarding dynamic
> > > ATU entries.
> >
> > What kind of optimization are you even talking about? Please get rid of
> > coding patterns like this, sorry.
> >
> Right!

Right what? I'm genuinely curious what optimization are you talking about.

> > > > > +
> > > > > +     if (mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, &reg))
> > > > > +             goto out;
> > > >
> > > > It would be good to actually propagate the error to the caller and
> > > > "locked" via a pass-by-reference bool pointer argument, not just say
> > > > that I/O errors mean that the port is unlocked.
> > >
> > > Again the wish to be able to call it from if statement checks,.
> > >
> > > > > +     reg &= MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK;
> > > > > +     if (locked) {
> > > > > +             reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> > > > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> > > > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> > > > > +                     MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
> > > >
> > > > I'd suggest aligning these macros vertically.
> > >
> > > They are according to the Linux kernel coding standard wrt indentation afaik.
> >
> > Compare:
> >
> >                 reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> >                         MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> >                         MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> >                         MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
> >
> > with:
> >
> >                 reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
> >                        MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
> >                        MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
> >                        MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
> 
> I cannot see the difference here...?

Just out of curiosity, are you even trying, are you looking at the
difference using a monospace font?

> Another issue...
> 
> I have removed the timers as they are superfluous and now just use the
> worker and jiffies. But I have found that the whole ageing time seems
> to be broken on the 5.17 kernel I am running. I don't know if it has
> been fixed, but the ageing timeout is supposed to be given in seconds.
> Here is the output from various functions after the command "ip link
> set dev br0 type bridge ageing_time 1500" (that is nominally 1500
> seconds according to man page!):
> 
> dsa_switch_ageing_time: ageing_time 10000, ageing_time_min 1000,
> ageing_time_max 3825000
> mv88e6xxx_set_ageing_time: set ageing time to 10000
> br0: failed (err=-34) to set attribute (id=6)
> dsa_switch_ageing_time: ageing_time 15000, ageing_time_min 1000,
> ageing_time_max 3825000
> mv88e6xxx_set_ageing_time: set ageing time to 15000
> 
> The 15000 set corresponds to 150 seconds! (I hardcoded the dsa
> ageing_time_min to 1000)

Are you talking about this known problem, that the ageing time values in
seconds need to be scaled up by a factor of USER_HZ when passed to the
kernel?
https://www.spinics.net/lists/netdev/msg672070.html
https://www.spinics.net/lists/netdev/msg567332.html
Hans S July 6, 2022, 3:38 p.m. UTC | #12
On Wed, Jul 6, 2022 at 4:33 PM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> > >> @@ -919,6 +920,9 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
> > >>       if (err)
> > >>               dev_err(chip->dev,
> > >>                       "p%d: failed to force MAC link down\n", port);
> > >> +     else
> > >> +             if (mv88e6xxx_port_is_locked(chip, port, true))
> > >> +                     mv88e6xxx_atu_locked_entry_flush(ds, port);
> > >
> > >This is superfluous, is it not? The bridge will transition a port whose
> > >link goes down to BR_STATE_DISABLED, which will make dsa_port_set_state()
> > >fast-age the dynamic FDB entries on the port, which you've already
> > >handled below.
> >
> > I removed this code, but then on link down the locked entries were not
> > cleared out. Something not as thought?
>
> What was the port's STP state before the link down event, and did it
> change after the link down?

The stp state is FORWARDING.

>
> If the STP state wasn't LEARNING or FORWARDING, there weren't supposed
> to be dynamic FDB entries on the port in the first place, so DSA says
> there's nothing to flush, and doesn't call dsa_port_fast_age().
> Are there dynamic FDB entries being installed on a port that isn't
> in a state that's supposed to learn? I guess the answer is yes.
> Is that what you want, or should the locked entries be recorded only in
> the LEARNING or FORWARDING states, otherwise discarded?
>
Learning is off as has been discussed, and I do want the locked
entries to be dynamic in the sense that the driver removes them after
the system ageing time has passed.

>
> What you actually want to say is: "mv88e6xxx_port_set_lock() is also
> called when the DSA port joins a bridge, due to the switchdev attribute
> replay logic present in dsa_port_switchdev_sync_attrs()".
>
> Which, by the way, is logic that you've added yourself, in commit
> b9e8b58fd2cb ("net: dsa: Include BR_PORT_LOCKED in the list of synced
> brport flags") ;)
>
> You are free to return early from mv88e6xxx_port_set_lock() if nothing has
> changed. The DSA layer doesn't keep track of the locked state of the
> port so it cannot deduce whether propagating to the switch driver is
> necessary or not.
>

I think I can safely call mv88e6xxx_atu_locked_entry_flush() from
mv88e6xxx_port_set_lock() when locked is off as the port setup for the
respective port must have been completed successfully.

> > When added they are added with bridge FDB flags: extern_learn offload
> > locked, with a SWITCHDEV_FDB_ADD_TO_BRIDGE event. So they are owned by
> > the driver.
> > When the driver deletes the locked entry the bridge FDB entry is
> > removes by the SWITCHDEV_FDB_DEL_TO_BRIDGE event from the driver. That
> > seems quite fair?
>
> I'm just pointing out that you left other (probably unintended) code
> paths for which the SWITCHDEV_FDB_DEL_TO_BRIDGE notifier is quite
> useless. I haven't yet looked at your newest revision to see what
> changed there.
>

I guess I should add a boolean to tell if
mv88e6xxx_atu_locked_entry_purge() should send a notification or not.
So that port_fdb_del() will not cause a SWITCHDEV_FDB_DEL_TO_BRIDGE
event.

> > > > > Why is the rtnl_unlock() outside the switch statement but the rtnl_lock() inside?
> > > > > Not to mention, the dsa_port_to_bridge_port() call needs to be under rtnl_lock().
> > > >
> > > > Just a small optimization as I also have another case of the switch
> > > > (only one switch case if
> > > > you didn't notice) belonging to the next patch set regarding dynamic
> > > > ATU entries.
> > >
> > > What kind of optimization are you even talking about? Please get rid of
> > > coding patterns like this, sorry.
> > >
> > Right!
>
> Right what? I'm genuinely curious what optimization are you talking about.
>

I am just confirming that what you wrote is correct, e.g. the
"Right!". So I have fixed that. :-)

>
> Just out of curiosity, are you even trying, are you looking at the
> difference using a monospace font?
>
> > Another issue...
> >
> > I have removed the timers as they are superfluous and now just use the
> > worker and jiffies. But I have found that the whole ageing time seems
> > to be broken on the 5.17 kernel I am running. I don't know if it has
> > been fixed, but the ageing timeout is supposed to be given in seconds.
> > Here is the output from various functions after the command "ip link
> > set dev br0 type bridge ageing_time 1500" (that is nominally 1500
> > seconds according to man page!):
> >
> > dsa_switch_ageing_time: ageing_time 10000, ageing_time_min 1000,
> > ageing_time_max 3825000
> > mv88e6xxx_set_ageing_time: set ageing time to 10000
> > br0: failed (err=-34) to set attribute (id=6)
> > dsa_switch_ageing_time: ageing_time 15000, ageing_time_min 1000,
> > ageing_time_max 3825000
> > mv88e6xxx_set_ageing_time: set ageing time to 15000
> >
> > The 15000 set corresponds to 150 seconds! (I hardcoded the dsa
> > ageing_time_min to 1000)
>
> Are you talking about this known problem, that the ageing time values in
> seconds need to be scaled up by a factor of USER_HZ when passed to the
> kernel?
> https://www.spinics.net/lists/netdev/msg672070.html
> https://www.spinics.net/lists/netdev/msg567332.html

It might be so, but there is another factor 10 which might be
regarding topology change as I understand. If I want a ageing timeout
of say 15 or 30 seconds, that hardly seems possible?
Hans S July 7, 2022, 6:54 a.m. UTC | #13
Hi Vladimir,

BTW, I have sent the patch to read the FID as you requested. You
should have received it yesterday (6th July) at around 12:25 UTC.

Hans
diff mbox series

Patch

diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index c8eca2b6f959..3ca57709730d 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -15,3 +15,4 @@  mv88e6xxx-objs += port_hidden.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
 mv88e6xxx-objs += serdes.o
 mv88e6xxx-objs += smi.o
+mv88e6xxx-objs += mv88e6xxx_switchdev.o
\ No newline at end of file
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 5d2c57a7c708..f7a16886bee9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -42,6 +42,7 @@ 
 #include "ptp.h"
 #include "serdes.h"
 #include "smi.h"
+#include "mv88e6xxx_switchdev.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
 {
@@ -919,6 +920,9 @@  static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
 	if (err)
 		dev_err(chip->dev,
 			"p%d: failed to force MAC link down\n", port);
+	else
+		if (mv88e6xxx_port_is_locked(chip, port, true))
+			mv88e6xxx_atu_locked_entry_flush(ds, port);
 }
 
 static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port,
@@ -1685,6 +1689,9 @@  static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (mv88e6xxx_port_is_locked(chip, port, true))
+		mv88e6xxx_atu_locked_entry_flush(ds, port);
+
 	mv88e6xxx_reg_lock(chip);
 	err = mv88e6xxx_port_fast_age_fid(chip, port, 0);
 	mv88e6xxx_reg_unlock(chip);
@@ -1721,11 +1728,11 @@  static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
 	return err;
 }
 
-static int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
-			      int (*cb)(struct mv88e6xxx_chip *chip,
-					const struct mv88e6xxx_vtu_entry *entry,
-					void *priv),
-			      void *priv)
+int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
+		       int (*cb)(struct mv88e6xxx_chip *chip,
+				 const struct mv88e6xxx_vtu_entry *entry,
+				 void *priv),
+		       void *priv)
 {
 	struct mv88e6xxx_vtu_entry entry = {
 		.vid = mv88e6xxx_max_vid(chip),
@@ -2722,9 +2729,12 @@  static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (mv88e6xxx_port_is_locked(chip, port, true))
+		mv88e6xxx_atu_locked_entry_find_purge(ds, port, addr, vid);
+
 	mv88e6xxx_reg_lock(chip);
 	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
-					   MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
+			MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
 	mv88e6xxx_reg_unlock(chip);
 
 	return err;
@@ -2735,12 +2745,17 @@  static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
 				  struct dsa_db db)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	bool locked_found = false;
 	int err;
 
-	mv88e6xxx_reg_lock(chip);
-	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, 0);
-	mv88e6xxx_reg_unlock(chip);
+	if (mv88e6xxx_port_is_locked(chip, port, true))
+		locked_found = mv88e6xxx_atu_locked_entry_find_purge(ds, port, addr, vid);
 
+	if (!locked_found) {
+		mv88e6xxx_reg_lock(chip);
+		err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, 0);
+		mv88e6xxx_reg_unlock(chip);
+	}
 	return err;
 }
 
@@ -3809,11 +3824,16 @@  static int mv88e6xxx_setup(struct dsa_switch *ds)
 
 static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port)
 {
-	return mv88e6xxx_setup_devlink_regions_port(ds, port);
+	int err;
+
+	err = mv88e6xxx_setup_devlink_regions_port(ds, port);
+	mv88e6xxx_init_violation_handler(ds, port);
+	return err;
 }
 
 static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port)
 {
+	mv88e6xxx_teardown_violation_handler(ds, port);
 	mv88e6xxx_teardown_devlink_regions_port(ds, port);
 }
 
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 5e03cfe50156..c9a8404a6293 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -803,6 +803,11 @@  static inline void mv88e6xxx_reg_unlock(struct mv88e6xxx_chip *chip)
 	mutex_unlock(&chip->reg_lock);
 }
 
+int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
+		       int (*cb)(struct mv88e6xxx_chip *chip,
+				 const struct mv88e6xxx_vtu_entry *entry,
+				 void *priv),
+		       void *priv);
 int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *bitmap);
 
 #endif /* _MV88E6XXX_CHIP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 65958b2a0d3a..503fbf216670 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -136,6 +136,7 @@ 
 #define MV88E6XXX_G1_ATU_DATA_TRUNK				0x8000
 #define MV88E6XXX_G1_ATU_DATA_TRUNK_ID_MASK			0x00f0
 #define MV88E6XXX_G1_ATU_DATA_PORT_VECTOR_MASK			0x3ff0
+#define MV88E6XXX_G1_ATU_DATA_PORT_VECTOR_NO_EGRESS		0x0000
 #define MV88E6XXX_G1_ATU_DATA_STATE_MASK			0x000f
 #define MV88E6XXX_G1_ATU_DATA_STATE_UC_UNUSED			0x0000
 #define MV88E6XXX_G1_ATU_DATA_STATE_UC_AGE_1_OLDEST		0x0001
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
index 40bd67a5c8e9..517376271f64 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
@@ -12,6 +12,8 @@ 
 
 #include "chip.h"
 #include "global1.h"
+#include "port.h"
+#include "mv88e6xxx_switchdev.h"
 
 /* Offset 0x01: ATU FID Register */
 
@@ -114,6 +116,18 @@  static int mv88e6xxx_g1_atu_op_wait(struct mv88e6xxx_chip *chip)
 	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_ATU_OP, bit, 0);
 }
 
+static int mv88e6xxx_g1_read_atu_violation(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_ATU_OP,
+				 MV88E6XXX_G1_ATU_OP_BUSY | MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g1_atu_op_wait(chip);
+}
+
 static int mv88e6xxx_g1_atu_op(struct mv88e6xxx_chip *chip, u16 fid, u16 op)
 {
 	u16 val;
@@ -356,11 +370,11 @@  static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 	int spid;
 	int err;
 	u16 val;
+	u16 fid;
 
 	mv88e6xxx_reg_lock(chip);
 
-	err = mv88e6xxx_g1_atu_op(chip, 0,
-				  MV88E6XXX_G1_ATU_OP_GET_CLR_VIOLATION);
+	err = mv88e6xxx_g1_read_atu_violation(chip);
 	if (err)
 		goto out;
 
@@ -368,6 +382,10 @@  static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 	if (err)
 		goto out;
 
+	err = mv88e6xxx_g1_read(chip, MV88E6352_G1_ATU_FID, &fid);
+	if (err)
+		goto out;
+
 	err = mv88e6xxx_g1_atu_data_read(chip, &entry);
 	if (err)
 		goto out;
@@ -382,6 +400,11 @@  static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 		dev_err_ratelimited(chip->dev,
 				    "ATU age out violation for %pM\n",
 				    entry.mac);
+		err = mv88e6xxx_handle_violation(chip,
+						 chip->ports[spid].port,
+						 &entry,
+						 fid,
+						 MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION);
 	}
 
 	if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) {
@@ -396,6 +419,14 @@  static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 				    "ATU miss violation for %pM portvec %x spid %d\n",
 				    entry.mac, entry.portvec, spid);
 		chip->ports[spid].atu_miss_violation++;
+		if (mv88e6xxx_port_is_locked(chip, chip->ports[spid].port, false))
+			err = mv88e6xxx_handle_violation(chip,
+							 chip->ports[spid].port,
+							 &entry,
+							 fid,
+							 MV88E6XXX_G1_ATU_OP_MISS_VIOLATION);
+		if (err)
+			goto out;
 	}
 
 	if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) {
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
new file mode 100644
index 000000000000..8436655ceb9a
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.c
@@ -0,0 +1,249 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * mv88e6xxx_switchdev.c
+ *
+ *	Authors:
+ *	Hans J. Schultz		<hans.schultz@westermo.com>
+ *
+ */
+
+#include <net/switchdev.h>
+#include <linux/list.h>
+#include "chip.h"
+#include "global1.h"
+#include "mv88e6xxx_switchdev.h"
+
+static void mv88e6xxx_atu_locked_entry_timer_work(struct atu_locked_entry *ale)
+{
+	struct switchdev_notifier_fdb_info info = {
+		.addr = ale->mac,
+		.vid = ale->vid,
+		.added_by_user = false,
+		.is_local = false,
+		.offloaded = true,
+		.locked = true,
+	};
+	struct mv88e6xxx_atu_entry entry;
+	struct net_device *brport;
+	struct dsa_port *dp;
+
+	entry.state = MV88E6XXX_G1_ATU_DATA_STATE_UC_UNUSED;
+	entry.trunk = false;
+	memcpy(&entry.mac, &ale->mac, ETH_ALEN);
+
+	mv88e6xxx_reg_lock(ale->chip);
+	mv88e6xxx_g1_atu_loadpurge(ale->chip, ale->fid, &entry);
+	mv88e6xxx_reg_unlock(ale->chip);
+
+	dp = dsa_to_port(ale->chip->ds, ale->port);
+	brport = dsa_port_to_bridge_port(dp);
+
+	if (brport) {
+		if (!rtnl_is_locked()) {
+			rtnl_lock();
+			call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+						 brport, &info.info, NULL);
+			rtnl_unlock();
+		} else {
+			call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+						 brport, &info.info, NULL);
+		}
+	} else {
+		dev_err(ale->chip->dev, "ERR: No bridge port for dsa port belonging to port %d\n",
+			ale->port);
+	}
+}
+
+static inline void mv88e6xxx_atu_locked_entry_purge(struct atu_locked_entry *ale)
+{
+	mv88e6xxx_atu_locked_entry_timer_work(ale);
+	del_timer(&ale->timer);
+	list_del(&ale->list);
+	kfree(ale);
+}
+
+static void mv88e6xxx_atu_locked_entry_cleanup(struct work_struct *work)
+{
+	struct dsa_port *dp = container_of(work, struct dsa_port, atu_work.work);
+	struct atu_locked_entry *ale, *tmp;
+
+	mutex_lock(&dp->locked_entries_list_lock);
+	list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
+		if (ale->timed_out) {
+			mv88e6xxx_atu_locked_entry_purge(ale);
+			atomic_dec(&dp->atu_locked_entry_cnt);
+		}
+	}
+	mutex_unlock(&dp->locked_entries_list_lock);
+
+	mod_delayed_work(system_long_wq, &dp->atu_work, msecs_to_jiffies(100));
+}
+
+static void mv88e6xxx_atu_locked_entry_timer_handler(struct timer_list *t)
+{
+	struct atu_locked_entry *ale = from_timer(ale, t, timer);
+
+	if (ale)
+		ale->timed_out = true;
+}
+
+struct mv88e6xxx_fid_search_ctx {
+	u16 fid_search;
+	u16 vid_found;
+};
+
+static int mv88e6xxx_find_vid_on_matching_fid(struct mv88e6xxx_chip *chip,
+					      const struct mv88e6xxx_vtu_entry *entry,
+					      void *priv)
+{
+	struct mv88e6xxx_fid_search_ctx *ctx = priv;
+
+	if (ctx->fid_search == entry->fid) {
+		ctx->vid_found = entry->vid;
+		return 1;
+	}
+
+	return 0;
+}
+
+int mv88e6xxx_handle_violation(struct mv88e6xxx_chip *chip,
+			       int port,
+			       struct mv88e6xxx_atu_entry *entry,
+			       u16 fid,
+			       u16 type)
+{
+	struct switchdev_notifier_fdb_info info = {
+		.addr = entry->mac,
+		.vid = 0,
+		.added_by_user = false,
+		.is_local = false,
+		.offloaded = true,
+		.locked = true,
+	};
+	struct atu_locked_entry *locked_entry;
+	struct mv88e6xxx_fid_search_ctx ctx;
+	struct net_device *brport;
+	struct dsa_port *dp;
+	int err;
+
+	ctx.fid_search = fid;
+	err = mv88e6xxx_vtu_walk(chip, mv88e6xxx_find_vid_on_matching_fid, &ctx);
+	if (err < 0)
+		return err;
+	if (err == 1)
+		info.vid = ctx.vid_found;
+	else
+		return -ENODATA;
+
+	dp = dsa_to_port(chip->ds, port);
+	brport = dsa_port_to_bridge_port(dp);
+
+	if (!brport)
+		return -ENODEV;
+
+	switch (type) {
+	case MV88E6XXX_G1_ATU_OP_MISS_VIOLATION:
+		if (atomic_read(&dp->atu_locked_entry_cnt) >= ATU_LOCKED_ENTRIES_MAX) {
+			mv88e6xxx_reg_unlock(chip);
+			return 0;
+		}
+		entry->portvec = MV88E6XXX_G1_ATU_DATA_PORT_VECTOR_NO_EGRESS;
+		entry->state = MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC;
+		entry->trunk = false;
+		err = mv88e6xxx_g1_atu_loadpurge(chip, fid, entry);
+		if (err)
+			goto fail;
+
+		locked_entry = kmalloc(sizeof(*locked_entry), GFP_ATOMIC);
+		if (!locked_entry)
+			return -ENOMEM;
+		timer_setup(&locked_entry->timer, mv88e6xxx_atu_locked_entry_timer_handler, 0);
+		locked_entry->timer.expires = jiffies + dp->ageing_time / 10;
+		locked_entry->chip = chip;
+		locked_entry->port = port;
+		locked_entry->fid = fid;
+		locked_entry->vid = info.vid;
+		locked_entry->timed_out = false;
+		memcpy(&locked_entry->mac, entry->mac, ETH_ALEN);
+
+		mutex_lock(&dp->locked_entries_list_lock);
+		add_timer(&locked_entry->timer);
+		list_add(&locked_entry->list, &dp->atu_locked_entries_list);
+		mutex_unlock(&dp->locked_entries_list_lock);
+		atomic_inc(&dp->atu_locked_entry_cnt);
+
+		mv88e6xxx_reg_unlock(chip);
+
+		rtnl_lock();
+		err = call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
+					       brport, &info.info, NULL);
+		break;
+	}
+	rtnl_unlock();
+
+	return err;
+
+fail:
+	mv88e6xxx_reg_unlock(chip);
+	return err;
+}
+
+bool mv88e6xxx_atu_locked_entry_find_purge(struct dsa_switch *ds, int port,
+					   const unsigned char *addr, u16 vid)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct atu_locked_entry *ale, *tmp;
+	bool found = false;
+
+	mutex_lock(&dp->locked_entries_list_lock);
+	list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
+		if (!memcmp(&ale->mac, addr, ETH_ALEN)) {
+			if (ale->vid == vid) {
+				mv88e6xxx_atu_locked_entry_purge(ale);
+				atomic_dec(&dp->atu_locked_entry_cnt);
+				found = true;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&dp->locked_entries_list_lock);
+	return found;
+}
+
+void mv88e6xxx_atu_locked_entry_flush(struct dsa_switch *ds, int port)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct atu_locked_entry *ale, *tmp;
+
+	mutex_lock(&dp->locked_entries_list_lock);
+	list_for_each_entry_safe(ale, tmp, &dp->atu_locked_entries_list, list) {
+		mv88e6xxx_atu_locked_entry_purge(ale);
+		atomic_dec(&dp->atu_locked_entry_cnt);
+	}
+	mutex_unlock(&dp->locked_entries_list_lock);
+
+	if (atomic_read(&dp->atu_locked_entry_cnt) != 0)
+		dev_err(ds->dev,
+			"ERROR: Locked entries count is not zero after flush on port %d\n",
+			port);
+}
+
+void mv88e6xxx_init_violation_handler(struct dsa_switch *ds, int port)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+
+	INIT_LIST_HEAD(&dp->atu_locked_entries_list);
+	mutex_init(&dp->locked_entries_list_lock);
+	dp->atu_locked_entry_cnt.counter = 0;
+	INIT_DELAYED_WORK(&dp->atu_work, mv88e6xxx_atu_locked_entry_cleanup);
+	mod_delayed_work(system_long_wq, &dp->atu_work, msecs_to_jiffies(100));
+}
+
+void mv88e6xxx_teardown_violation_handler(struct dsa_switch *ds, int port)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+
+	cancel_delayed_work(&dp->atu_work);
+	mv88e6xxx_atu_locked_entry_flush(ds, port);
+	mutex_destroy(&dp->locked_entries_list_lock);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h
new file mode 100644
index 000000000000..f0e7abf7c361
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx_switchdev.h
@@ -0,0 +1,40 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * mv88e6xxx_switchdev.h
+ *
+ *	Authors:
+ *	Hans J. Schultz		<hans.schultz@westermo.com>
+ *
+ */
+
+#ifndef DRIVERS_NET_DSA_MV88E6XXX_MV88E6XXX_SWITCHDEV_H_
+#define DRIVERS_NET_DSA_MV88E6XXX_MV88E6XXX_SWITCHDEV_H_
+
+#include <net/switchdev.h>
+#include "chip.h"
+
+#define ATU_LOCKED_ENTRIES_MAX	64
+
+struct atu_locked_entry {
+	struct list_head list;
+	struct mv88e6xxx_chip *chip;
+	int port;
+	u8	mac[ETH_ALEN];
+	u16 fid;
+	u16 vid;
+	struct timer_list timer;
+	bool timed_out;
+};
+
+int mv88e6xxx_handle_violation(struct mv88e6xxx_chip *chip,
+			       int port,
+			       struct mv88e6xxx_atu_entry *entry,
+			       u16 fid,
+			       u16 type);
+bool mv88e6xxx_atu_locked_entry_find_purge(struct dsa_switch *ds, int port,
+					   const unsigned char *addr, u16 vid);
+void mv88e6xxx_atu_locked_entry_flush(struct dsa_switch *ds, int port);
+void mv88e6xxx_init_violation_handler(struct dsa_switch *ds, int port);
+void mv88e6xxx_teardown_violation_handler(struct dsa_switch *ds, int port);
+
+#endif /* DRIVERS_NET_DSA_MV88E6XXX_MV88E6XXX_SWITCHDEV_H_ */
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 795b3128768f..c4e5e7174129 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -14,9 +14,11 @@ 
 #include <linux/phylink.h>
 
 #include "chip.h"
+#include "global1.h"
 #include "global2.h"
 #include "port.h"
 #include "serdes.h"
+#include "mv88e6xxx_switchdev.h"
 
 int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
 			u16 *val)
@@ -1239,6 +1241,25 @@  int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port,
 	return err;
 }
 
+bool mv88e6xxx_port_is_locked(struct mv88e6xxx_chip *chip, int port, bool chiplock)
+{
+	bool locked = false;
+	u16 reg;
+
+	if (chiplock)
+		mv88e6xxx_reg_lock(chip);
+
+	if (mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL0, &reg))
+		goto out;
+	locked = reg & MV88E6XXX_PORT_CTL0_SA_FILT_DROP_ON_LOCK;
+
+out:
+	if (chiplock)
+		mv88e6xxx_reg_unlock(chip);
+
+	return locked;
+}
+
 int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port,
 			    bool locked)
 {
@@ -1261,10 +1282,13 @@  int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port,
 	if (err)
 		return err;
 
-	reg &= ~MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT;
-	if (locked)
-		reg |= MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT;
-
+	reg &= MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK;
+	if (locked) {
+		reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG |
+			MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT |
+			MV88E6XXX_PORT_ASSOC_VECTOR_INT_AGE_OUT |
+			MV88E6XXX_PORT_ASSOC_VECTOR_HOLD_AT_1;
+	}
 	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR, reg);
 }
 
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index e0a705d82019..d377abd6ab17 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -231,6 +231,7 @@ 
 #define MV88E6XXX_PORT_ASSOC_VECTOR_LOCKED_PORT		0x2000
 #define MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG	0x1000
 #define MV88E6XXX_PORT_ASSOC_VECTOR_REFRESH_LOCKED	0x0800
+#define MV88E6XXX_PORT_ASSOC_VECTOR_PAV_MASK		0x07ff
 
 /* Offset 0x0C: Port ATU Control */
 #define MV88E6XXX_PORT_ATU_CTL		0x0c
@@ -374,6 +375,7 @@  int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid);
 int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid);
 int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid);
 
+bool mv88e6xxx_port_is_locked(struct mv88e6xxx_chip *chip, int port, bool chiplock);
 int mv88e6xxx_port_set_lock(struct mv88e6xxx_chip *chip, int port,
 			    bool locked);