diff mbox series

[net,1/3] net: bonding: Unsync device addresses on ndo_stop

Message ID 20220831025836.207070-2-bpoirier@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Unsync addresses from ports when stopping aggregated devices | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 69 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Benjamin Poirier Aug. 31, 2022, 2:58 a.m. UTC
Netdev drivers are expected to call dev_{uc,mc}_sync() in their
ndo_set_rx_mode method and dev_{uc,mc}_unsync() in their ndo_stop method.
This is mentioned in the kerneldoc for those dev_* functions.

The bonding driver calls dev_{uc,mc}_unsync() during ndo_uninit instead of
ndo_stop. This is ineffective because address lists (dev->{uc,mc}) have
already been emptied in unregister_netdevice_many() before ndo_uninit is
called. This mistake can result in addresses being leftover on former bond
slaves after a bond has been deleted; see test_LAG_cleanup() in the last
patch in this series.

Add unsync calls, via bond_hw_addr_flush(), at their expected location,
bond_close().
Add dev_mc_add() call to bond_open() to match the above change.
The existing call __bond_release_one->bond_hw_addr_flush is left in place
because there are other call chains that lead to __bond_release_one(), not
just ndo_uninit.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
---
 drivers/net/bonding/bond_main.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

Comments

Jay Vosburgh Aug. 31, 2022, 3:25 a.m. UTC | #1
Benjamin Poirier <bpoirier@nvidia.com> wrote:

>Netdev drivers are expected to call dev_{uc,mc}_sync() in their
>ndo_set_rx_mode method and dev_{uc,mc}_unsync() in their ndo_stop method.
>This is mentioned in the kerneldoc for those dev_* functions.
>
>The bonding driver calls dev_{uc,mc}_unsync() during ndo_uninit instead of
>ndo_stop. This is ineffective because address lists (dev->{uc,mc}) have
>already been emptied in unregister_netdevice_many() before ndo_uninit is
>called. This mistake can result in addresses being leftover on former bond
>slaves after a bond has been deleted; see test_LAG_cleanup() in the last
>patch in this series.
>
>Add unsync calls, via bond_hw_addr_flush(), at their expected location,
>bond_close().
>Add dev_mc_add() call to bond_open() to match the above change.
>The existing call __bond_release_one->bond_hw_addr_flush is left in place
>because there are other call chains that lead to __bond_release_one(), not
>just ndo_uninit.
>
>Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")

	I'm just going from memory here, so I'm probably wrong, but
didn't the sync/unsync stuff for HW addresses happen several years after
the git transition?

>Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
>---
> drivers/net/bonding/bond_main.c | 31 +++++++++++++++++++++----------
> 1 file changed, 21 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 2f4da2c13c0a..5784fbe03552 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -254,6 +254,8 @@ static const struct flow_dissector_key flow_keys_bonding_keys[] = {
> 
> static struct flow_dissector flow_keys_bonding __read_mostly;
> 
>+static const u8 lacpdu_multicast[] = MULTICAST_LACPDU_ADDR;
>+
> /*-------------------------- Forward declarations ---------------------------*/
> 
> static int bond_init(struct net_device *bond_dev);
>@@ -865,12 +867,8 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,
> 	dev_uc_unsync(slave_dev, bond_dev);
> 	dev_mc_unsync(slave_dev, bond_dev);
> 
>-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>-		/* del lacpdu mc addr from mc list */
>-		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
>-
>+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
> 		dev_mc_del(slave_dev, lacpdu_multicast);
>-	}
> }
> 
> /*--------------------------- Active slave change ---------------------------*/
>@@ -2171,12 +2169,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
> 		dev_uc_sync_multiple(slave_dev, bond_dev);
> 		netif_addr_unlock_bh(bond_dev);
> 
>-		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>-			/* add lacpdu mc addr to mc list */
>-			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
>-
>+		if (BOND_MODE(bond) == BOND_MODE_8023AD)
> 			dev_mc_add(slave_dev, lacpdu_multicast);
>-		}
> 	}

	Just to make sure I'm clear, the above changes regarding
lacpdu_multicast have no functional impact, correct?  They appear to
move lacpdu_multicast to global scope for use in the change just below.

> 
> 	bond->slave_cnt++;
>@@ -4211,6 +4205,9 @@ static int bond_open(struct net_device *bond_dev)
> 		/* register to receive LACPDUs */
> 		bond->recv_probe = bond_3ad_lacpdu_recv;
> 		bond_3ad_initiate_agg_selection(bond, 1);
>+
>+		bond_for_each_slave(bond, slave, iter)
>+			dev_mc_add(slave->dev, lacpdu_multicast);
> 	}

	After this patch, am I understanding correctly that both
bond_enslave and bond_open will call dev_mc_add for lacpdu_multicast?
Since __dev_mc_add calls __hw_addr_add_ex with sync=false and
exclusive=false, doesn't that allow us to end up with two references?

	-J

> 	if (bond_mode_can_use_xmit_hash(bond))
>@@ -4222,6 +4219,7 @@ static int bond_open(struct net_device *bond_dev)
> static int bond_close(struct net_device *bond_dev)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
>+	struct slave *slave;
> 
> 	bond_work_cancel_all(bond);
> 	bond->send_peer_notif = 0;
>@@ -4229,6 +4227,19 @@ static int bond_close(struct net_device *bond_dev)
> 		bond_alb_deinitialize(bond);
> 	bond->recv_probe = NULL;
> 
>+	if (bond_uses_primary(bond)) {
>+		rcu_read_lock();
>+		slave = rcu_dereference(bond->curr_active_slave);
>+		if (slave)
>+			bond_hw_addr_flush(bond_dev, slave->dev);
>+		rcu_read_unlock();
>+	} else {
>+		struct list_head *iter;
>+
>+		bond_for_each_slave(bond, slave, iter)
>+			bond_hw_addr_flush(bond_dev, slave->dev);
>+	}
>+
> 	return 0;
> }
> 
>-- 
>2.36.1
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com
Benjamin Poirier Aug. 31, 2022, 4:36 a.m. UTC | #2
On 2022-08-30 20:25 -0700, Jay Vosburgh wrote:
> Benjamin Poirier <bpoirier@nvidia.com> wrote:
> 
> >Netdev drivers are expected to call dev_{uc,mc}_sync() in their
> >ndo_set_rx_mode method and dev_{uc,mc}_unsync() in their ndo_stop method.
> >This is mentioned in the kerneldoc for those dev_* functions.
> >
> >The bonding driver calls dev_{uc,mc}_unsync() during ndo_uninit instead of
> >ndo_stop. This is ineffective because address lists (dev->{uc,mc}) have
> >already been emptied in unregister_netdevice_many() before ndo_uninit is
> >called. This mistake can result in addresses being leftover on former bond
> >slaves after a bond has been deleted; see test_LAG_cleanup() in the last
> >patch in this series.
> >
> >Add unsync calls, via bond_hw_addr_flush(), at their expected location,
> >bond_close().
> >Add dev_mc_add() call to bond_open() to match the above change.
> >The existing call __bond_release_one->bond_hw_addr_flush is left in place
> >because there are other call chains that lead to __bond_release_one(), not
> >just ndo_uninit.
> >
> >Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> 
> 	I'm just going from memory here, so I'm probably wrong, but
> didn't the sync/unsync stuff for HW addresses happen several years after
> the git transition?

Yes, you're right. The bonding driver was converted to dev addr list
functions in commit 303d1cbf610e ("bonding: Convert hw addr handling to
sync/unsync, support ucast addresses", v3.11-rc1). However, the problem
fixed by this patch ("addresses being leftover on former bond slaves
after a bond has been deleted") was present before the conversion (at
least for mc, uc was not handled at all before the conversion). Since
the problem was not introduced by 303d1cbf610e, that's why I chose
1da177e4c3f4 for the Fixes tag. Does that make sense?
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2f4da2c13c0a..5784fbe03552 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -254,6 +254,8 @@  static const struct flow_dissector_key flow_keys_bonding_keys[] = {
 
 static struct flow_dissector flow_keys_bonding __read_mostly;
 
+static const u8 lacpdu_multicast[] = MULTICAST_LACPDU_ADDR;
+
 /*-------------------------- Forward declarations ---------------------------*/
 
 static int bond_init(struct net_device *bond_dev);
@@ -865,12 +867,8 @@  static void bond_hw_addr_flush(struct net_device *bond_dev,
 	dev_uc_unsync(slave_dev, bond_dev);
 	dev_mc_unsync(slave_dev, bond_dev);
 
-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-		/* del lacpdu mc addr from mc list */
-		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		dev_mc_del(slave_dev, lacpdu_multicast);
-	}
 }
 
 /*--------------------------- Active slave change ---------------------------*/
@@ -2171,12 +2169,8 @@  int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 		dev_uc_sync_multiple(slave_dev, bond_dev);
 		netif_addr_unlock_bh(bond_dev);
 
-		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-			/* add lacpdu mc addr to mc list */
-			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
+		if (BOND_MODE(bond) == BOND_MODE_8023AD)
 			dev_mc_add(slave_dev, lacpdu_multicast);
-		}
 	}
 
 	bond->slave_cnt++;
@@ -4211,6 +4205,9 @@  static int bond_open(struct net_device *bond_dev)
 		/* register to receive LACPDUs */
 		bond->recv_probe = bond_3ad_lacpdu_recv;
 		bond_3ad_initiate_agg_selection(bond, 1);
+
+		bond_for_each_slave(bond, slave, iter)
+			dev_mc_add(slave->dev, lacpdu_multicast);
 	}
 
 	if (bond_mode_can_use_xmit_hash(bond))
@@ -4222,6 +4219,7 @@  static int bond_open(struct net_device *bond_dev)
 static int bond_close(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
 
 	bond_work_cancel_all(bond);
 	bond->send_peer_notif = 0;
@@ -4229,6 +4227,19 @@  static int bond_close(struct net_device *bond_dev)
 		bond_alb_deinitialize(bond);
 	bond->recv_probe = NULL;
 
+	if (bond_uses_primary(bond)) {
+		rcu_read_lock();
+		slave = rcu_dereference(bond->curr_active_slave);
+		if (slave)
+			bond_hw_addr_flush(bond_dev, slave->dev);
+		rcu_read_unlock();
+	} else {
+		struct list_head *iter;
+
+		bond_for_each_slave(bond, slave, iter)
+			bond_hw_addr_flush(bond_dev, slave->dev);
+	}
+
 	return 0;
 }