diff mbox series

[net,v2,1/3] net: bonding: Unsync device addresses on ndo_stop

Message ID 20220902014516.184930-2-bpoirier@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Unsync addresses from ports when stopping aggregated devices | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 69 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Benjamin Poirier Sept. 2, 2022, 1:45 a.m. UTC
Netdev drivers are expected to call dev_{uc,mc}_sync() in their
ndo_set_rx_mode method and dev_{uc,mc}_unsync() in their ndo_stop method.
This is mentioned in the kerneldoc for those dev_* functions.

The bonding driver calls dev_{uc,mc}_unsync() during ndo_uninit instead of
ndo_stop. This is ineffective because address lists (dev->{uc,mc}) have
already been emptied in unregister_netdevice_many() before ndo_uninit is
called. This mistake can result in addresses being leftover on former bond
slaves after a bond has been deleted; see test_LAG_cleanup() in the last
patch in this series.

Add unsync calls, via bond_hw_addr_flush(), at their expected location,
bond_close().
Add dev_mc_add() call to bond_open() to match the above change.
The existing call __bond_release_one->bond_hw_addr_flush is left in place
because there are other call chains that lead to __bond_release_one(), not
just ndo_uninit.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
---
 drivers/net/bonding/bond_main.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

Comments

Jay Vosburgh Sept. 2, 2022, 6:28 p.m. UTC | #1
Benjamin Poirier <bpoirier@nvidia.com> wrote:

	Repeating a couple of questions that I suspect were missed the
first time around:

>Netdev drivers are expected to call dev_{uc,mc}_sync() in their
>ndo_set_rx_mode method and dev_{uc,mc}_unsync() in their ndo_stop method.
>This is mentioned in the kerneldoc for those dev_* functions.
>
>The bonding driver calls dev_{uc,mc}_unsync() during ndo_uninit instead of
>ndo_stop. This is ineffective because address lists (dev->{uc,mc}) have
>already been emptied in unregister_netdevice_many() before ndo_uninit is
>called. This mistake can result in addresses being leftover on former bond
>slaves after a bond has been deleted; see test_LAG_cleanup() in the last
>patch in this series.
>
>Add unsync calls, via bond_hw_addr_flush(), at their expected location,
>bond_close().
>Add dev_mc_add() call to bond_open() to match the above change.
>The existing call __bond_release_one->bond_hw_addr_flush is left in place
>because there are other call chains that lead to __bond_release_one(), not
>just ndo_uninit.
>
>Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
>---
> drivers/net/bonding/bond_main.c | 31 +++++++++++++++++++++----------
> 1 file changed, 21 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 2f4da2c13c0a..5784fbe03552 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -254,6 +254,8 @@ static const struct flow_dissector_key flow_keys_bonding_keys[] = {
> 
> static struct flow_dissector flow_keys_bonding __read_mostly;
> 
>+static const u8 lacpdu_multicast[] = MULTICAST_LACPDU_ADDR;
>+
> /*-------------------------- Forward declarations ---------------------------*/
> 
> static int bond_init(struct net_device *bond_dev);
>@@ -865,12 +867,8 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,
> 	dev_uc_unsync(slave_dev, bond_dev);
> 	dev_mc_unsync(slave_dev, bond_dev);
> 
>-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>-		/* del lacpdu mc addr from mc list */
>-		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
>-
>+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
> 		dev_mc_del(slave_dev, lacpdu_multicast);
>-	}
> }
> 
> /*--------------------------- Active slave change ---------------------------*/
>@@ -2171,12 +2169,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
> 		dev_uc_sync_multiple(slave_dev, bond_dev);
> 		netif_addr_unlock_bh(bond_dev);
> 
>-		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>-			/* add lacpdu mc addr to mc list */
>-			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
>-
>+		if (BOND_MODE(bond) == BOND_MODE_8023AD)
> 			dev_mc_add(slave_dev, lacpdu_multicast);
>-		}
> 	}

	Just to make sure I'm clear (not missing something in the
churn), the above changes regarding lacpdu_multicast have no functional
impact, correct?  They appear to move lacpdu_multicast to global scope
for use in the change just below.

> 	bond->slave_cnt++;
>@@ -4211,6 +4205,9 @@ static int bond_open(struct net_device *bond_dev)
> 		/* register to receive LACPDUs */
> 		bond->recv_probe = bond_3ad_lacpdu_recv;
> 		bond_3ad_initiate_agg_selection(bond, 1);
>+
>+		bond_for_each_slave(bond, slave, iter)
>+			dev_mc_add(slave->dev, lacpdu_multicast);
> 	}

	After this change, am I understanding correctly that both
bond_enslave() and bond_open() will call dev_mc_add() for
lacpdu_multicast?  Since dev_mc_add() -> __dev_mc_add() calls
__hw_addr_add_ex() with sync=false and exclusive=false, could that allow
us to end up with two references for lacpdu_multicast?

	-J

> 
> 	if (bond_mode_can_use_xmit_hash(bond))
>@@ -4222,6 +4219,7 @@ static int bond_open(struct net_device *bond_dev)
> static int bond_close(struct net_device *bond_dev)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
>+	struct slave *slave;
> 
> 	bond_work_cancel_all(bond);
> 	bond->send_peer_notif = 0;
>@@ -4229,6 +4227,19 @@ static int bond_close(struct net_device *bond_dev)
> 		bond_alb_deinitialize(bond);
> 	bond->recv_probe = NULL;
> 
>+	if (bond_uses_primary(bond)) {
>+		rcu_read_lock();
>+		slave = rcu_dereference(bond->curr_active_slave);
>+		if (slave)
>+			bond_hw_addr_flush(bond_dev, slave->dev);
>+		rcu_read_unlock();
>+	} else {
>+		struct list_head *iter;
>+
>+		bond_for_each_slave(bond, slave, iter)
>+			bond_hw_addr_flush(bond_dev, slave->dev);
>+	}
>+
> 	return 0;
> }
> 
>-- 
>2.37.2
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com
Benjamin Poirier Sept. 5, 2022, 9:19 a.m. UTC | #2
On 2022-09-02 11:28 -0700, Jay Vosburgh wrote:
> Benjamin Poirier <bpoirier@nvidia.com> wrote:
> 
> 	Repeating a couple of questions that I suspect were missed the
> first time around:

Thanks for repeating, I did miss the other questions, sorry.

[...]
> >@@ -2171,12 +2169,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
> > 		dev_uc_sync_multiple(slave_dev, bond_dev);
> > 		netif_addr_unlock_bh(bond_dev);
> > 
> >-		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
> >-			/* add lacpdu mc addr to mc list */
> >-			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
> >-
> >+		if (BOND_MODE(bond) == BOND_MODE_8023AD)
> > 			dev_mc_add(slave_dev, lacpdu_multicast);
> >-		}
> > 	}
> 
> 	Just to make sure I'm clear (not missing something in the
> churn), the above changes regarding lacpdu_multicast have no functional
> impact, correct?  They appear to move lacpdu_multicast to global scope
> for use in the change just below.

Yes, that's right - no functional impact. I'll split that to a separate
patch to make it clearer.

> > 	bond->slave_cnt++;
> >@@ -4211,6 +4205,9 @@ static int bond_open(struct net_device *bond_dev)
> > 		/* register to receive LACPDUs */
> > 		bond->recv_probe = bond_3ad_lacpdu_recv;
> > 		bond_3ad_initiate_agg_selection(bond, 1);
> >+
> >+		bond_for_each_slave(bond, slave, iter)
> >+			dev_mc_add(slave->dev, lacpdu_multicast);
> > 	}
> 
> 	After this change, am I understanding correctly that both
> bond_enslave() and bond_open() will call dev_mc_add() for
> lacpdu_multicast?  Since dev_mc_add() -> __dev_mc_add() calls
> __hw_addr_add_ex() with sync=false and exclusive=false, could that allow
> us to end up with two references for lacpdu_multicast?

You are correct once again. When enslaving to an up bond (case in the
selftest), it is ok, but when enslaving to a down bond and then setting
it up, there is a double add.

Thanks for the review. I'll send a v3.
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2f4da2c13c0a..5784fbe03552 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -254,6 +254,8 @@  static const struct flow_dissector_key flow_keys_bonding_keys[] = {
 
 static struct flow_dissector flow_keys_bonding __read_mostly;
 
+static const u8 lacpdu_multicast[] = MULTICAST_LACPDU_ADDR;
+
 /*-------------------------- Forward declarations ---------------------------*/
 
 static int bond_init(struct net_device *bond_dev);
@@ -865,12 +867,8 @@  static void bond_hw_addr_flush(struct net_device *bond_dev,
 	dev_uc_unsync(slave_dev, bond_dev);
 	dev_mc_unsync(slave_dev, bond_dev);
 
-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-		/* del lacpdu mc addr from mc list */
-		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		dev_mc_del(slave_dev, lacpdu_multicast);
-	}
 }
 
 /*--------------------------- Active slave change ---------------------------*/
@@ -2171,12 +2169,8 @@  int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 		dev_uc_sync_multiple(slave_dev, bond_dev);
 		netif_addr_unlock_bh(bond_dev);
 
-		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-			/* add lacpdu mc addr to mc list */
-			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
-
+		if (BOND_MODE(bond) == BOND_MODE_8023AD)
 			dev_mc_add(slave_dev, lacpdu_multicast);
-		}
 	}
 
 	bond->slave_cnt++;
@@ -4211,6 +4205,9 @@  static int bond_open(struct net_device *bond_dev)
 		/* register to receive LACPDUs */
 		bond->recv_probe = bond_3ad_lacpdu_recv;
 		bond_3ad_initiate_agg_selection(bond, 1);
+
+		bond_for_each_slave(bond, slave, iter)
+			dev_mc_add(slave->dev, lacpdu_multicast);
 	}
 
 	if (bond_mode_can_use_xmit_hash(bond))
@@ -4222,6 +4219,7 @@  static int bond_open(struct net_device *bond_dev)
 static int bond_close(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
 
 	bond_work_cancel_all(bond);
 	bond->send_peer_notif = 0;
@@ -4229,6 +4227,19 @@  static int bond_close(struct net_device *bond_dev)
 		bond_alb_deinitialize(bond);
 	bond->recv_probe = NULL;
 
+	if (bond_uses_primary(bond)) {
+		rcu_read_lock();
+		slave = rcu_dereference(bond->curr_active_slave);
+		if (slave)
+			bond_hw_addr_flush(bond_dev, slave->dev);
+		rcu_read_unlock();
+	} else {
+		struct list_head *iter;
+
+		bond_for_each_slave(bond, slave, iter)
+			bond_hw_addr_flush(bond_dev, slave->dev);
+	}
+
 	return 0;
 }