diff mbox series

[net-next,1/2] rtnetlink: move rtnl_lock handling out of af_netlink

Message ID 20240606192906.1941189-2-kuba@kernel.org (mailing list archive)
State Accepted
Commit 5380d64f8d766576ac5c0f627418b2d0e1d2641f
Delegated to: Netdev Maintainers
Headers show
Series rtnetlink: move rtnl_lock handling out of af_netlink | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 869 this patch: 869
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 868 this patch: 868
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 873 this patch: 873
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 42 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 13 this patch: 13
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-06-07--09-00 (tests: 1041)

Commit Message

Jakub Kicinski June 6, 2024, 7:29 p.m. UTC
Now that we have an intermediate layer of code for handling
rtnl-level netlink dump quirks, we can move the rtnl_lock
taking there.

For dump handlers with RTNL_FLAG_DUMP_SPLIT_NLM_DONE we can
avoid taking rtnl_lock just to generate NLM_DONE, once again.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/rtnetlink.c     | 9 +++++++--
 net/netlink/af_netlink.c | 2 --
 2 files changed, 7 insertions(+), 4 deletions(-)

Comments

Kuniyuki Iwashima June 6, 2024, 11:33 p.m. UTC | #1
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu,  6 Jun 2024 12:29:05 -0700
> Now that we have an intermediate layer of code for handling
> rtnl-level netlink dump quirks, we can move the rtnl_lock
> taking there.
> 
> For dump handlers with RTNL_FLAG_DUMP_SPLIT_NLM_DONE we can
> avoid taking rtnl_lock just to generate NLM_DONE, once again.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  net/core/rtnetlink.c     | 9 +++++++--
>  net/netlink/af_netlink.c | 2 --
>  2 files changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 4668d6718040..eabfc8290f5e 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -6486,6 +6486,7 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
>  
>  static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
>  {
> +	const bool needs_lock = !(cb->flags & RTNL_FLAG_DUMP_UNLOCKED);
>  	rtnl_dumpit_func dumpit = cb->data;
>  	int err;
>  
> @@ -6495,7 +6496,11 @@ static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
>  	if (!dumpit)
>  		return 0;
>  
> +	if (needs_lock)
> +		rtnl_lock();
>  	err = dumpit(skb, cb);
> +	if (needs_lock)
> +		rtnl_unlock();

This calls netdev_run_todo() now, is this change intended ?

Other parts look good to me.


>  
>  	/* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
>  	 * Some applications which parse netlink manually depend on this.
> @@ -6515,7 +6520,8 @@ static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
>  				const struct nlmsghdr *nlh,
>  				struct netlink_dump_control *control)
>  {
> -	if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
> +	if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE ||
> +	    !(control->flags & RTNL_FLAG_DUMP_UNLOCKED)) {
>  		WARN_ON(control->data);
>  		control->data = control->dump;
>  		control->dump = rtnl_dumpit;
> @@ -6703,7 +6709,6 @@ static int __net_init rtnetlink_net_init(struct net *net)
>  	struct netlink_kernel_cfg cfg = {
>  		.groups		= RTNLGRP_MAX,
>  		.input		= rtnetlink_rcv,
> -		.cb_mutex	= &rtnl_mutex,
>  		.flags		= NL_CFG_F_NONROOT_RECV,
>  		.bind		= rtnetlink_bind,
>  	};
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index fa9c090cf629..8bbbe75e75db 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -2330,8 +2330,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
>  
>  		cb->extack = &extack;
>  
> -		if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
> -			extra_mutex = NULL;
>  		if (extra_mutex)
>  			mutex_lock(extra_mutex);
>  		nlk->dump_done_errno = cb->dump(skb, cb);
> -- 
> 2.45.2
Jakub Kicinski June 7, 2024, 12:04 a.m. UTC | #2
On Thu, 6 Jun 2024 16:33:03 -0700 Kuniyuki Iwashima wrote:
> > +	if (needs_lock)
> > +		rtnl_lock();
> >  	err = dumpit(skb, cb);
> > +	if (needs_lock)
> > +		rtnl_unlock();  
> 
> This calls netdev_run_todo() now, is this change intended ?

Nice catch / careful thinking, indeed we're moving from pure unlock to
run_todo. I don't really recall if I thought of this when writing the
change (it was few days back). My guess is that the fact we weren't
calling full rtnl_unlock() was unintentional / out of laziness in the
first place. It didn't matter since dumps are unlikely to changes /
unregister / free things. But still, someone may get caught off guard
as some point that we're holding rtnl but won't go via the usual unlock
path.

Would you like me to add a note to the commit message?
Kuniyuki Iwashima June 7, 2024, 12:18 a.m. UTC | #3
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 6 Jun 2024 17:04:53 -0700
> On Thu, 6 Jun 2024 16:33:03 -0700 Kuniyuki Iwashima wrote:
> > > +	if (needs_lock)
> > > +		rtnl_lock();
> > >  	err = dumpit(skb, cb);
> > > +	if (needs_lock)
> > > +		rtnl_unlock();  
> > 
> > This calls netdev_run_todo() now, is this change intended ?
> 
> Nice catch / careful thinking, indeed we're moving from pure unlock to
> run_todo. I don't really recall if I thought of this when writing the
> change (it was few days back). My guess is that the fact we weren't
> calling full rtnl_unlock() was unintentional / out of laziness in the
> first place. It didn't matter since dumps are unlikely to changes /
> unregister / free things.

This makes sense.  Probably due to cb_mutex interface constraint.


> But still, someone may get caught off guard
> as some point that we're holding rtnl but won't go via the usual unlock
> path.
> 
> Would you like me to add a note to the commit message?

That would be nice, but I'm fine with this version :)

Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>

Thanks!
Eric Dumazet June 7, 2024, 9:24 a.m. UTC | #4
On Thu, Jun 6, 2024 at 9:29 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Now that we have an intermediate layer of code for handling
> rtnl-level netlink dump quirks, we can move the rtnl_lock
> taking there.
>
> For dump handlers with RTNL_FLAG_DUMP_SPLIT_NLM_DONE we can
> avoid taking rtnl_lock just to generate NLM_DONE, once again.
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Reviewed-by: Eric Dumazet <edumazet@google.com>
diff mbox series

Patch

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4668d6718040..eabfc8290f5e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -6486,6 +6486,7 @@  static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	const bool needs_lock = !(cb->flags & RTNL_FLAG_DUMP_UNLOCKED);
 	rtnl_dumpit_func dumpit = cb->data;
 	int err;
 
@@ -6495,7 +6496,11 @@  static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 	if (!dumpit)
 		return 0;
 
+	if (needs_lock)
+		rtnl_lock();
 	err = dumpit(skb, cb);
+	if (needs_lock)
+		rtnl_unlock();
 
 	/* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
 	 * Some applications which parse netlink manually depend on this.
@@ -6515,7 +6520,8 @@  static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 				const struct nlmsghdr *nlh,
 				struct netlink_dump_control *control)
 {
-	if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+	if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE ||
+	    !(control->flags & RTNL_FLAG_DUMP_UNLOCKED)) {
 		WARN_ON(control->data);
 		control->data = control->dump;
 		control->dump = rtnl_dumpit;
@@ -6703,7 +6709,6 @@  static int __net_init rtnetlink_net_init(struct net *net)
 	struct netlink_kernel_cfg cfg = {
 		.groups		= RTNLGRP_MAX,
 		.input		= rtnetlink_rcv,
-		.cb_mutex	= &rtnl_mutex,
 		.flags		= NL_CFG_F_NONROOT_RECV,
 		.bind		= rtnetlink_bind,
 	};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fa9c090cf629..8bbbe75e75db 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2330,8 +2330,6 @@  static int netlink_dump(struct sock *sk, bool lock_taken)
 
 		cb->extack = &extack;
 
-		if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
-			extra_mutex = NULL;
 		if (extra_mutex)
 			mutex_lock(extra_mutex);
 		nlk->dump_done_errno = cb->dump(skb, cb);