diff mbox series

[RFC,2/2] net: bonding: don't call ethtool methods under RCU

Message ID 20240718122017.d2e33aaac43a.I10ab9c9ded97163aef4e4de10985cd8f7de60d28@changeid (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [RFC,1/2] net: bonding: correctly annotate RCU in bond_should_notify_peers() | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 661 this patch: 661
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 4 maintainers not CCed: andy@greyhouse.net pabeni@redhat.com kuba@kernel.org j.vosburgh@gmail.com
netdev/build_clang success Errors and warnings before: 662 this patch: 662
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 662 this patch: 662
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 93 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 13 this patch: 13
netdev/source_inline success Was 0 now: 0

Commit Message

Johannes Berg July 18, 2024, 7:20 p.m. UTC
From: Johannes Berg <johannes.berg@intel.com>

Currently, bond_miimon_inspect() is called under RCU, but it
calls ethtool ops. Since my earlier commit facd15dfd691
("net: core: synchronize link-watch when carrier is queried")
this is no longer permitted in the general ethtool case, but
it was already not permitted for many drivers such as USB in
which it can sleep to do MDIO register accesses etc.

Therefore, it's better to simply not do this. Change bonding
to acquire the RTNL for the MII monitor work directly to call
the bond_miimon_inspect() function and thus ethtool ops.

Reported-by: syzbot+2120b9a8f96b3fa90bad@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/bonding/bond_main.c | 49 +++++++++++----------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

Comments

Jay Vosburgh July 18, 2024, 9:12 p.m. UTC | #1
Johannes Berg <johannes@sipsolutions.net> wrote:

>From: Johannes Berg <johannes.berg@intel.com>
>
>Currently, bond_miimon_inspect() is called under RCU, but it
>calls ethtool ops. Since my earlier commit facd15dfd691
>("net: core: synchronize link-watch when carrier is queried")
>this is no longer permitted in the general ethtool case, but
>it was already not permitted for many drivers such as USB in
>which it can sleep to do MDIO register accesses etc.
>
>Therefore, it's better to simply not do this. Change bonding
>to acquire the RTNL for the MII monitor work directly to call
>the bond_miimon_inspect() function and thus ethtool ops.

	We can't do this, as it will hit RTNL every monitor interval,
which can be many times per second.  The logic is structured to
specifically avoid acquiring RTNL during the inspection pass.

	The issue that szybot is seeing only happens if bonding's
use_carrier option is set to 0, which is not the normal case.
use_carrier is a backwards compatibility option from years ago for
drivers that do not implement netif_carrier_on/off (and thus calling
netif_carrier_ok() would be unreliable).

	This also came up in [0], and looking now I see there's a patch
that syzbot tested, although I haven't reviewed it.

	Another option is to for the Powers That Be to declare that it's
safe to assume that network drivers implement netif_carrier_on/off to
advertise their link state, in which case the use_carrier logic in
bonding can be removed.

	Or we can somehow isolate the "must acquire RTNL instead of RCU"
to the problematic use_carrier=0 path, but that's a nontrivial change.

	-J

[0] https://lore.kernel.org/lkml/000000000000eb54bf061cfd666a@google.com/

>Reported-by: syzbot+2120b9a8f96b3fa90bad@syzkaller.appspotmail.com
>Signed-off-by: Johannes Berg <johannes.berg@intel.com>
>---
> drivers/net/bonding/bond_main.c | 49 +++++++++++----------------------
> 1 file changed, 16 insertions(+), 33 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 2ed0da068490..6a635c23b00e 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2576,7 +2576,6 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
> 
> /*-------------------------------- Monitoring -------------------------------*/
> 
>-/* called with rcu_read_lock() */
> static int bond_miimon_inspect(struct bonding *bond)
> {
> 	bool ignore_updelay = false;
>@@ -2585,17 +2584,17 @@ static int bond_miimon_inspect(struct bonding *bond)
> 	struct slave *slave;
> 
> 	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
>-		ignore_updelay = !rcu_dereference(bond->curr_active_slave);
>+		ignore_updelay = !rcu_access_pointer(bond->curr_active_slave);
> 	} else {
> 		struct bond_up_slave *usable_slaves;
> 
>-		usable_slaves = rcu_dereference(bond->usable_slaves);
>+		usable_slaves = rtnl_dereference(bond->usable_slaves);
> 
> 		if (usable_slaves && usable_slaves->count == 0)
> 			ignore_updelay = true;
> 	}
> 
>-	bond_for_each_slave_rcu(bond, slave, iter) {
>+	bond_for_each_slave(bond, slave, iter) {
> 		bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
> 
> 		link_state = bond_check_dev_link(bond, slave->dev, 0);
>@@ -2807,8 +2806,7 @@ static void bond_mii_monitor(struct work_struct *work)
> {
> 	struct bonding *bond = container_of(work, struct bonding,
> 					    mii_work.work);
>-	bool should_notify_peers = false;
>-	bool commit;
>+	bool should_notify_peers;
> 	unsigned long delay;
> 	struct slave *slave;
> 	struct list_head *iter;
>@@ -2818,45 +2816,30 @@ static void bond_mii_monitor(struct work_struct *work)
> 	if (!bond_has_slaves(bond))
> 		goto re_arm;
> 
>-	rcu_read_lock();
>-	should_notify_peers = bond_should_notify_peers(bond);
>-	commit = !!bond_miimon_inspect(bond);
>-	if (bond->send_peer_notif) {
>-		rcu_read_unlock();
>-		if (rtnl_trylock()) {
>-			bond->send_peer_notif--;
>-			rtnl_unlock();
>-		}
>-	} else {
>-		rcu_read_unlock();
>+	/* deadlock avoidance with bond_close cancel of workqueue */
>+	if (!rtnl_trylock()) {
>+		delay = 1;
>+		goto re_arm;
> 	}
> 
>-	if (commit) {
>-		/* Race avoidance with bond_close cancel of workqueue */
>-		if (!rtnl_trylock()) {
>-			delay = 1;
>-			should_notify_peers = false;
>-			goto re_arm;
>-		}
>+	should_notify_peers = bond_should_notify_peers(bond);
> 
>+	if (bond_miimon_inspect(bond)) {
> 		bond_for_each_slave(bond, slave, iter) {
> 			bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
> 		}
> 		bond_miimon_commit(bond);
>-
>-		rtnl_unlock();	/* might sleep, hold no other locks */
> 	}
> 
>+	if (bond->send_peer_notif)
>+		bond->send_peer_notif--;
>+	if (should_notify_peers)
>+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
>+	rtnl_unlock();	/* might sleep, hold no other locks */
>+
> re_arm:
> 	if (bond->params.miimon)
> 		queue_delayed_work(bond->wq, &bond->mii_work, delay);
>-
>-	if (should_notify_peers) {
>-		if (!rtnl_trylock())
>-			return;
>-		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
>-		rtnl_unlock();
>-	}
> }
> 
> static int bond_upper_dev_walk(struct net_device *upper,
>-- 
>2.45.2
>
>

---
	-Jay Vosburgh, jv@jvosburgh.net
Jiri Pirko July 19, 2024, 9:50 a.m. UTC | #2
Thu, Jul 18, 2024 at 09:20:17PM CEST, johannes@sipsolutions.net wrote:
>From: Johannes Berg <johannes.berg@intel.com>
>
>Currently, bond_miimon_inspect() is called under RCU, but it
>calls ethtool ops. Since my earlier commit facd15dfd691
>("net: core: synchronize link-watch when carrier is queried")
>this is no longer permitted in the general ethtool case, but
>it was already not permitted for many drivers such as USB in
>which it can sleep to do MDIO register accesses etc.
>
>Therefore, it's better to simply not do this. Change bonding
>to acquire the RTNL for the MII monitor work directly to call
>the bond_miimon_inspect() function and thus ethtool ops.

Is there a good reason why to directly query device here using whatever?
I mean, why netif_oper_up() would not return the correct bool here?
Introduction of periodic rtnl locking for no good reason is not probably
something we should do :/
Johannes Berg July 19, 2024, 4:31 p.m. UTC | #3
On Fri, 2024-07-19 at 11:50 +0200, Jiri Pirko wrote:
> Thu, Jul 18, 2024 at 09:20:17PM CEST, johannes@sipsolutions.net wrote:
> > From: Johannes Berg <johannes.berg@intel.com>
> > 
> > Currently, bond_miimon_inspect() is called under RCU, but it
> > calls ethtool ops. Since my earlier commit facd15dfd691
> > ("net: core: synchronize link-watch when carrier is queried")
> > this is no longer permitted in the general ethtool case, but
> > it was already not permitted for many drivers such as USB in
> > which it can sleep to do MDIO register accesses etc.
> > 
> > Therefore, it's better to simply not do this. Change bonding
> > to acquire the RTNL for the MII monitor work directly to call
> > the bond_miimon_inspect() function and thus ethtool ops.
> 
> Is there a good reason why to directly query device here using whatever?

See Jay's email for that, I think?

> I mean, why netif_oper_up() would not return the correct bool here?
> Introduction of periodic rtnl locking for no good reason is not probably
> something we should do :/
> 

Yeah, fair.

johannes
Johannes Berg July 19, 2024, 4:37 p.m. UTC | #4
On Thu, 2024-07-18 at 14:12 -0700, Jay Vosburgh wrote:
> 
> 	We can't do this, as it will hit RTNL every monitor interval,
> which can be many times per second.

Fair.

>   The logic is structured to
> specifically avoid acquiring RTNL during the inspection pass.

We also cannot do _that_, however, it's just broken with devices that
want to sleep there. Arguably the common ethtool op that syzbot
complains about is just a distraction, because while that does sleep
(now), it's also equivalent to use_carrier==1, so we could just say "oh
if it's the common ethtool op then use the carrier directly", but while
that'd prevent syzbot from reporting the issue again, it'd not actually
fix the problem with all the USB drivers etc.

> 	The issue that szybot is seeing only happens if bonding's
> use_carrier option is set to 0, which is not the normal case.

Sure, but like I said above, syzbot doesn't really matter. Don't get too
hung up on it.

> use_carrier is a backwards compatibility option from years ago for
> drivers that do not implement netif_carrier_on/off (and thus calling
> netif_carrier_ok() would be unreliable).

Sure, OK.

> 	This also came up in [0], and looking now I see there's a patch
> that syzbot tested, although I haven't reviewed it.

+Hillf, I believe that patch is broken because it completely defeats the
purpose of my original patch there, and also addresses only the "syzbot
complains about common ethtool op" issue, not the more general problem.

> 	Another option is to for the Powers That Be to declare that it's
> safe to assume that network drivers implement netif_carrier_on/off to
> advertise their link state, in which case the use_carrier logic in
> bonding can be removed.

No objection to that, if you don't have proper carrier reporting then a
lot of other things will likely be broken anyway?

> 	Or we can somehow isolate the "must acquire RTNL instead of RCU"
> to the problematic use_carrier=0 path, but that's a nontrivial change.
> 

I guess.

johannes
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2ed0da068490..6a635c23b00e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2576,7 +2576,6 @@  static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
 
 /*-------------------------------- Monitoring -------------------------------*/
 
-/* called with rcu_read_lock() */
 static int bond_miimon_inspect(struct bonding *bond)
 {
 	bool ignore_updelay = false;
@@ -2585,17 +2584,17 @@  static int bond_miimon_inspect(struct bonding *bond)
 	struct slave *slave;
 
 	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
-		ignore_updelay = !rcu_dereference(bond->curr_active_slave);
+		ignore_updelay = !rcu_access_pointer(bond->curr_active_slave);
 	} else {
 		struct bond_up_slave *usable_slaves;
 
-		usable_slaves = rcu_dereference(bond->usable_slaves);
+		usable_slaves = rtnl_dereference(bond->usable_slaves);
 
 		if (usable_slaves && usable_slaves->count == 0)
 			ignore_updelay = true;
 	}
 
-	bond_for_each_slave_rcu(bond, slave, iter) {
+	bond_for_each_slave(bond, slave, iter) {
 		bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
 
 		link_state = bond_check_dev_link(bond, slave->dev, 0);
@@ -2807,8 +2806,7 @@  static void bond_mii_monitor(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
-	bool should_notify_peers = false;
-	bool commit;
+	bool should_notify_peers;
 	unsigned long delay;
 	struct slave *slave;
 	struct list_head *iter;
@@ -2818,45 +2816,30 @@  static void bond_mii_monitor(struct work_struct *work)
 	if (!bond_has_slaves(bond))
 		goto re_arm;
 
-	rcu_read_lock();
-	should_notify_peers = bond_should_notify_peers(bond);
-	commit = !!bond_miimon_inspect(bond);
-	if (bond->send_peer_notif) {
-		rcu_read_unlock();
-		if (rtnl_trylock()) {
-			bond->send_peer_notif--;
-			rtnl_unlock();
-		}
-	} else {
-		rcu_read_unlock();
+	/* deadlock avoidance with bond_close cancel of workqueue */
+	if (!rtnl_trylock()) {
+		delay = 1;
+		goto re_arm;
 	}
 
-	if (commit) {
-		/* Race avoidance with bond_close cancel of workqueue */
-		if (!rtnl_trylock()) {
-			delay = 1;
-			should_notify_peers = false;
-			goto re_arm;
-		}
+	should_notify_peers = bond_should_notify_peers(bond);
 
+	if (bond_miimon_inspect(bond)) {
 		bond_for_each_slave(bond, slave, iter) {
 			bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
 		}
 		bond_miimon_commit(bond);
-
-		rtnl_unlock();	/* might sleep, hold no other locks */
 	}
 
+	if (bond->send_peer_notif)
+		bond->send_peer_notif--;
+	if (should_notify_peers)
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
+	rtnl_unlock();	/* might sleep, hold no other locks */
+
 re_arm:
 	if (bond->params.miimon)
 		queue_delayed_work(bond->wq, &bond->mii_work, delay);
-
-	if (should_notify_peers) {
-		if (!rtnl_trylock())
-			return;
-		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
-		rtnl_unlock();
-	}
 }
 
 static int bond_upper_dev_walk(struct net_device *upper,