diff mbox series

[RFC,v2,net-next,15/17] net: dsa: replay port and local fdb entries when joining the bridge

Message ID 20210224114350.2791260-16-olteanv@gmail.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series RX filtering in DSA | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 4 maintainers not CCed: bridge@lists.linux-foundation.org davem@davemloft.net kuba@kernel.org ivecera@redhat.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 172 this patch: 172
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 113 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 198 this patch: 198
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Vladimir Oltean Feb. 24, 2021, 11:43 a.m. UTC
From: Vladimir Oltean <vladimir.oltean@nxp.com>

When a DSA port joins a LAG that already had an FDB entry pointing to it:

ip link set bond0 master br0
bridge fdb add dev bond0 00:01:02:03:04:05 master static
ip link set swp0 master bond0

the DSA port will have no idea that this FDB entry is there, because it
missed the switchdev event emitted at its creation.

Ido Schimmel pointed this out during a discussion about challenges with
switchdev offloading of stacked interfaces between the physical port and
the bridge, and recommended to just catch that condition and deny the
CHANGEUPPER event:
https://lore.kernel.org/netdev/20210210105949.GB287766@shredder.lan/

But in fact, we might need to deal with the hard thing anyway, which is
to replay all FDB addresses relevant to this port, because it isn't just
static FDB entries, but also local addresses (ones that are not
forwarded but terminated by the bridge). There, we can't just say 'oh
yeah, there was an upper already so I'm not joining that'.

So, similar to the logic for replaying MDB entries, add a function that
must be called by individual switchdev drivers and replays local FDB
entries as well as ones pointing towards a bridge port. This time, we
use the atomic switchdev notifier block, since that's what FDB entries
expect for some reason.

Reported-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 include/linux/if_bridge.h | 10 ++++++++
 include/net/switchdev.h   |  1 +
 net/bridge/br_fdb.c       | 53 +++++++++++++++++++++++++++++++++++++++
 net/dsa/slave.c           |  7 +++++-
 4 files changed, 70 insertions(+), 1 deletion(-)

Comments

Tobias Waldekranz Feb. 26, 2021, 12:23 p.m. UTC | #1
On Wed, Feb 24, 2021 at 13:43, Vladimir Oltean <olteanv@gmail.com> wrote:
> From: Vladimir Oltean <vladimir.oltean@nxp.com>
>
> When a DSA port joins a LAG that already had an FDB entry pointing to it:
>
> ip link set bond0 master br0
> bridge fdb add dev bond0 00:01:02:03:04:05 master static
> ip link set swp0 master bond0
>
> the DSA port will have no idea that this FDB entry is there, because it
> missed the switchdev event emitted at its creation.
>
> Ido Schimmel pointed this out during a discussion about challenges with
> switchdev offloading of stacked interfaces between the physical port and
> the bridge, and recommended to just catch that condition and deny the
> CHANGEUPPER event:
> https://lore.kernel.org/netdev/20210210105949.GB287766@shredder.lan/
>
> But in fact, we might need to deal with the hard thing anyway, which is
> to replay all FDB addresses relevant to this port, because it isn't just
> static FDB entries, but also local addresses (ones that are not
> forwarded but terminated by the bridge). There, we can't just say 'oh
> yeah, there was an upper already so I'm not joining that'.
>
> So, similar to the logic for replaying MDB entries, add a function that
> must be called by individual switchdev drivers and replays local FDB
> entries as well as ones pointing towards a bridge port. This time, we
> use the atomic switchdev notifier block, since that's what FDB entries
> expect for some reason.
>
> Reported-by: Ido Schimmel <idosch@idosch.org>
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> ---
>  include/linux/if_bridge.h | 10 ++++++++
>  include/net/switchdev.h   |  1 +
>  net/bridge/br_fdb.c       | 53 +++++++++++++++++++++++++++++++++++++++
>  net/dsa/slave.c           |  7 +++++-
>  4 files changed, 70 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index 2f0e5713bf39..2a90ac638b06 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -144,6 +144,8 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
>  				    __u16 vid);
>  void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
>  bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
> +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
> +		  struct notifier_block *nb);
>  #else
>  static inline struct net_device *
>  br_fdb_find_port(const struct net_device *br_dev,
> @@ -162,6 +164,14 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
>  {
>  	return false;
>  }
> +
> +static inline int br_fdb_replay(struct net_device *br_dev,
> +				struct net_device *dev,
> +				struct notifier_block *nb)
> +{
> +	return -EINVAL;
> +}
> +
>  #endif
>  
>  #endif
> diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> index f1a5a9a3634d..5b63dfd444c6 100644
> --- a/include/net/switchdev.h
> +++ b/include/net/switchdev.h
> @@ -206,6 +206,7 @@ struct switchdev_notifier_info {
>  
>  struct switchdev_notifier_fdb_info {
>  	struct switchdev_notifier_info info; /* must be first */
> +	struct list_head list;
>  	const unsigned char *addr;
>  	u16 vid;
>  	u8 added_by_user:1,
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index 1d54ae0f58fb..9eb776503b02 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -726,6 +726,59 @@ static inline size_t fdb_nlmsg_size(void)
>  		+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
>  }
>  
> +static int br_fdb_replay_one(struct notifier_block *nb,
> +			     struct net_bridge_fdb_entry *fdb,
> +			     struct net_device *dev)
> +{
> +	struct switchdev_notifier_fdb_info item;
> +	int err;
> +
> +	item.addr = fdb->key.addr.addr;
> +	item.vid = fdb->key.vlan_id;
> +	item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
> +	item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
> +	item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
> +	item.info.dev = dev;
> +
> +	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
> +	return notifier_to_errno(err);
> +}
> +
> +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
> +		  struct notifier_block *nb)
> +{
> +	struct net_bridge_fdb_entry *fdb;
> +	struct net_bridge *br;
> +	int err = 0;
> +
> +	if (!netif_is_bridge_master(br_dev))
> +		return -EINVAL;
> +
> +	if (!netif_is_bridge_port(dev))
> +		return -EINVAL;
> +
> +	br = netdev_priv(br_dev);
> +
> +	rcu_read_lock();
> +
> +	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
> +		struct net_device *dst_dev;
> +
> +		dst_dev = fdb->dst ? fdb->dst->dev : br->dev;
> +		if (dst_dev != br_dev && dst_dev != dev)
> +			continue;
> +
> +		err = br_fdb_replay_one(nb, fdb, dst_dev);
> +		if (err)
> +			break;
> +	}
> +
> +	rcu_read_unlock();
> +
> +	return err;
> +}
> +EXPORT_SYMBOL(br_fdb_replay);
> +
>  static void fdb_notify(struct net_bridge *br,
>  		       const struct net_bridge_fdb_entry *fdb, int type,
>  		       bool swdev_notify)
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index 10b4a0f72dcb..5fa5737e622c 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -2290,7 +2290,8 @@ bool dsa_slave_dev_check(const struct net_device *dev)
>  }
>  EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
>  
> -/* Circular reference */
> +/* Circular references */
> +static struct notifier_block dsa_slave_switchdev_notifier;
>  static struct notifier_block dsa_slave_switchdev_blocking_notifier;
>  
>  static int dsa_slave_changeupper(struct net_device *dev,
> @@ -2306,6 +2307,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
>  			err = dsa_port_bridge_join(dp, bridge_dev);
>  			if (!err) {
>  				dsa_bridge_mtu_normalization(dp);
> +				br_fdb_replay(bridge_dev, dev,
> +					      &dsa_slave_switchdev_notifier);
>  				br_mdb_replay(bridge_dev, dev,
>  					      &dsa_slave_switchdev_blocking_notifier);

If VLAN filtering is enabled, we would also have to replay that. Port
attributes also, right?

I like the pull model, because it saves the bridge from doing lots of
dumpster diving. However, should there be a single `bridge_replay` that
takes care of everything?

Rather than this kit-car approarch which outsources ordering etc to each
switchdev driver, you issue a single call saying: "bring me up to
speed". It seems right that that knowledge should reside in the bridge
since it was the one who sent the original events that are being
replayed.

>  			}
> @@ -2370,6 +2373,8 @@ dsa_slave_lag_changeupper(struct net_device *dev,
>  	}
>  
>  	if (netif_is_bridge_master(info->upper_dev) && !err) {
> +		br_fdb_replay(info->upper_dev, dev,
> +			      &dsa_slave_switchdev_notifier);
>  		br_mdb_replay(info->upper_dev, dev,
>  			      &dsa_slave_switchdev_blocking_notifier);
>  	}
> -- 
> 2.25.1
Vladimir Oltean Feb. 26, 2021, 6:08 p.m. UTC | #2
On Fri, Feb 26, 2021 at 01:23:23PM +0100, Tobias Waldekranz wrote:
> If VLAN filtering is enabled, we would also have to replay that. Port
> attributes also, right?
> 
> I like the pull model, because it saves the bridge from doing lots of
> dumpster diving. However, should there be a single `bridge_replay` that
> takes care of everything?
> 
> Rather than this kit-car approarch which outsources ordering etc to each
> switchdev driver, you issue a single call saying: "bring me up to
> speed". It seems right that that knowledge should reside in the bridge
> since it was the one who sent the original events that are being
> replayed.

Yes, in the non-RFC version I'm going to do that.
I'm also thinking I could just pass the blocking and atomic switchdev
notifiers as an argument to the switchdev_bridge_port_offload_notify()
call, such that the drivers need to do one thing and one thing only.

For the purposes of this RFC I just wanted to have something that works
for address filtering.
diff mbox series

Patch

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 2f0e5713bf39..2a90ac638b06 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -144,6 +144,8 @@  struct net_device *br_fdb_find_port(const struct net_device *br_dev,
 				    __u16 vid);
 void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -162,6 +164,14 @@  br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
 {
 	return false;
 }
+
+static inline int br_fdb_replay(struct net_device *br_dev,
+				struct net_device *dev,
+				struct notifier_block *nb)
+{
+	return -EINVAL;
+}
+
 #endif
 
 #endif
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index f1a5a9a3634d..5b63dfd444c6 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -206,6 +206,7 @@  struct switchdev_notifier_info {
 
 struct switchdev_notifier_fdb_info {
 	struct switchdev_notifier_info info; /* must be first */
+	struct list_head list;
 	const unsigned char *addr;
 	u16 vid;
 	u8 added_by_user:1,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 1d54ae0f58fb..9eb776503b02 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -726,6 +726,59 @@  static inline size_t fdb_nlmsg_size(void)
 		+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
 }
 
+static int br_fdb_replay_one(struct notifier_block *nb,
+			     struct net_bridge_fdb_entry *fdb,
+			     struct net_device *dev)
+{
+	struct switchdev_notifier_fdb_info item;
+	int err;
+
+	item.addr = fdb->key.addr.addr;
+	item.vid = fdb->key.vlan_id;
+	item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+	item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+	item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
+	item.info.dev = dev;
+
+	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
+	return notifier_to_errno(err);
+}
+
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb)
+{
+	struct net_bridge_fdb_entry *fdb;
+	struct net_bridge *br;
+	int err = 0;
+
+	if (!netif_is_bridge_master(br_dev))
+		return -EINVAL;
+
+	if (!netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	br = netdev_priv(br_dev);
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
+		struct net_device *dst_dev;
+
+		dst_dev = fdb->dst ? fdb->dst->dev : br->dev;
+		if (dst_dev != br_dev && dst_dev != dev)
+			continue;
+
+		err = br_fdb_replay_one(nb, fdb, dst_dev);
+		if (err)
+			break;
+	}
+
+	rcu_read_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL(br_fdb_replay);
+
 static void fdb_notify(struct net_bridge *br,
 		       const struct net_bridge_fdb_entry *fdb, int type,
 		       bool swdev_notify)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 10b4a0f72dcb..5fa5737e622c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2290,7 +2290,8 @@  bool dsa_slave_dev_check(const struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
 
-/* Circular reference */
+/* Circular references */
+static struct notifier_block dsa_slave_switchdev_notifier;
 static struct notifier_block dsa_slave_switchdev_blocking_notifier;
 
 static int dsa_slave_changeupper(struct net_device *dev,
@@ -2306,6 +2307,8 @@  static int dsa_slave_changeupper(struct net_device *dev,
 			err = dsa_port_bridge_join(dp, bridge_dev);
 			if (!err) {
 				dsa_bridge_mtu_normalization(dp);
+				br_fdb_replay(bridge_dev, dev,
+					      &dsa_slave_switchdev_notifier);
 				br_mdb_replay(bridge_dev, dev,
 					      &dsa_slave_switchdev_blocking_notifier);
 			}
@@ -2370,6 +2373,8 @@  dsa_slave_lag_changeupper(struct net_device *dev,
 	}
 
 	if (netif_is_bridge_master(info->upper_dev) && !err) {
+		br_fdb_replay(info->upper_dev, dev,
+			      &dsa_slave_switchdev_notifier);
 		br_mdb_replay(info->upper_dev, dev,
 			      &dsa_slave_switchdev_blocking_notifier);
 	}