diff mbox series

[v1,net-next,13/13] rtnetlink: Protect struct rtnl_af_ops with SRCU.

Message ID 20241009231656.57830-14-kuniyu@amazon.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series rtnetlink: Refactor rtnl_{new,del,set}link() for per-netns RTNL. | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 19 this patch: 19
netdev/build_tools success Errors and warnings before: 0 (+2) this patch: 0 (+2)
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 47 this patch: 47
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 2887 this patch: 2889
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 120 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 13 this patch: 13
netdev/source_inline success Was 0 now: 0

Commit Message

Kuniyuki Iwashima Oct. 9, 2024, 11:16 p.m. UTC
Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
guarantee that rtnl_af_ops is alive during inflight RTM_SETLINK
even when its module is being unloaded.

Let's use SRCU to protect rtnl_af_ops.

rtnl_af_lookup() now iterates rtnl_af_ops under RCU and returns
SRCU-protected ops pointer.  The caller must call rtnl_af_put()
to release the pointer after the use.

Also, rtnl_af_unregister() unlinks the ops first and calls
synchronize_srcu() to wait for inflight RTM_SETLINK requests to
complete.

Note that rtnl_af_ops needs to be protected by its dedicated lock
when RTNL is removed.

Note also that BUG_ON() in do_setlink() is changed to the normal
error handling as a different af_ops might be found after
validate_linkmsg().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/rtnetlink.h |  5 +++-
 net/core/rtnetlink.c    | 58 +++++++++++++++++++++++++++++------------
 2 files changed, 46 insertions(+), 17 deletions(-)

Comments

Eric Dumazet Oct. 10, 2024, 1:16 p.m. UTC | #1
On Thu, Oct 10, 2024 at 1:21 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
> guarantee that rtnl_af_ops is alive during inflight RTM_SETLINK
> even when its module is being unloaded.
>
> Let's use SRCU to protect rtnl_af_ops.
>
> rtnl_af_lookup() now iterates rtnl_af_ops under RCU and returns
> SRCU-protected ops pointer.  The caller must call rtnl_af_put()
> to release the pointer after the use.
>
> Also, rtnl_af_unregister() unlinks the ops first and calls
> synchronize_srcu() to wait for inflight RTM_SETLINK requests to
> complete.
>
> Note that rtnl_af_ops needs to be protected by its dedicated lock
> when RTNL is removed.
>
> Note also that BUG_ON() in do_setlink() is changed to the normal
> error handling as a different af_ops might be found after
> validate_linkmsg().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
>  include/net/rtnetlink.h |  5 +++-
>  net/core/rtnetlink.c    | 58 +++++++++++++++++++++++++++++------------
>  2 files changed, 46 insertions(+), 17 deletions(-)
>
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index c873fd6193ed..407a2f56f00a 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -150,7 +150,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops);
>  /**
>   *     struct rtnl_af_ops - rtnetlink address family operations
>   *
> - *     @list: Used internally
> + *     @list: Used internally, protected by RTNL and SRCU
> + *     @srcu: Used internally
>   *     @family: Address family
>   *     @fill_link_af: Function to fill IFLA_AF_SPEC with address family
>   *                    specific netlink attributes.
> @@ -163,6 +164,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops);
>   */
>  struct rtnl_af_ops {
>         struct list_head        list;
> +       struct srcu_struct      srcu;
> +
>         int                     family;
>
>         int                     (*fill_link_af)(struct sk_buff *skb,
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index a0702e531331..817165f6d5ef 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -660,18 +660,31 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
>
>  static LIST_HEAD(rtnl_af_ops);
>
> -static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
> +static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index)
>  {
> -       const struct rtnl_af_ops *ops;
> +       struct rtnl_af_ops *ops;
>
>         ASSERT_RTNL();
>
> -       list_for_each_entry(ops, &rtnl_af_ops, list) {
> -               if (ops->family == family)
> -                       return ops;
> +       rcu_read_lock();
> +
> +       list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
> +               if (ops->family == family) {
> +                       *srcu_index = srcu_read_lock(&ops->srcu);
> +                       goto unlock;
> +               }
>         }
>
> -       return NULL;
> +       ops = NULL;
> +unlock:
> +       rcu_read_unlock();
> +
> +       return ops;
> +}
> +
> +static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index)
> +{
> +       srcu_read_unlock(&ops->srcu, srcu_index);
>  }
>
>  /**
> @@ -683,6 +696,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
>  void rtnl_af_register(struct rtnl_af_ops *ops)
>  {
>         rtnl_lock();
> +       init_srcu_struct(&ops->srcu);

Same remark, this could fail.
Kuniyuki Iwashima Oct. 10, 2024, 4:27 p.m. UTC | #2
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 15:16:20 +0200
> > @@ -683,6 +696,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
> >  void rtnl_af_register(struct rtnl_af_ops *ops)
> >  {
> >         rtnl_lock();
> > +       init_srcu_struct(&ops->srcu);
> 
> Same remark, this could fail.

This requries rtnl_af_register() to return int, and that needs
few more changes conflicting with rtnl_register_many() changes.

I'll see if it'd be a bit noisy, and if so, I'll split this patch
to another series.

Thanks!
diff mbox series

Patch

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index c873fd6193ed..407a2f56f00a 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -150,7 +150,8 @@  void rtnl_link_unregister(struct rtnl_link_ops *ops);
 /**
  * 	struct rtnl_af_ops - rtnetlink address family operations
  *
- *	@list: Used internally
+ *	@list: Used internally, protected by RTNL and SRCU
+ *	@srcu: Used internally
  * 	@family: Address family
  * 	@fill_link_af: Function to fill IFLA_AF_SPEC with address family
  * 		       specific netlink attributes.
@@ -163,6 +164,8 @@  void rtnl_link_unregister(struct rtnl_link_ops *ops);
  */
 struct rtnl_af_ops {
 	struct list_head	list;
+	struct srcu_struct	srcu;
+
 	int			family;
 
 	int			(*fill_link_af)(struct sk_buff *skb,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a0702e531331..817165f6d5ef 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -660,18 +660,31 @@  static size_t rtnl_link_get_size(const struct net_device *dev)
 
 static LIST_HEAD(rtnl_af_ops);
 
-static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index)
 {
-	const struct rtnl_af_ops *ops;
+	struct rtnl_af_ops *ops;
 
 	ASSERT_RTNL();
 
-	list_for_each_entry(ops, &rtnl_af_ops, list) {
-		if (ops->family == family)
-			return ops;
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
+		if (ops->family == family) {
+			*srcu_index = srcu_read_lock(&ops->srcu);
+			goto unlock;
+		}
 	}
 
-	return NULL;
+	ops = NULL;
+unlock:
+	rcu_read_unlock();
+
+	return ops;
+}
+
+static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index)
+{
+	srcu_read_unlock(&ops->srcu, srcu_index);
 }
 
 /**
@@ -683,6 +696,7 @@  static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
 void rtnl_af_register(struct rtnl_af_ops *ops)
 {
 	rtnl_lock();
+	init_srcu_struct(&ops->srcu);
 	list_add_tail_rcu(&ops->list, &rtnl_af_ops);
 	rtnl_unlock();
 }
@@ -699,6 +713,7 @@  void rtnl_af_unregister(struct rtnl_af_ops *ops)
 	rtnl_unlock();
 
 	synchronize_rcu();
+	synchronize_srcu(&ops->srcu);
 }
 EXPORT_SYMBOL_GPL(rtnl_af_unregister);
 
@@ -2571,20 +2586,24 @@  static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
 		int rem, err;
 
 		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
-			const struct rtnl_af_ops *af_ops;
+			struct rtnl_af_ops *af_ops;
+			int srcu_ops_index;
 
-			af_ops = rtnl_af_lookup(nla_type(af));
+			af_ops = rtnl_af_lookup(nla_type(af), &srcu_ops_index);
 			if (!af_ops)
 				return -EAFNOSUPPORT;
 
 			if (!af_ops->set_link_af)
-				return -EOPNOTSUPP;
-
-			if (af_ops->validate_link_af) {
+				err = -EOPNOTSUPP;
+			else if (af_ops->validate_link_af)
 				err = af_ops->validate_link_af(dev, af, extack);
-				if (err < 0)
-					return err;
-			}
+			else
+				err = 0;
+
+			rtnl_af_put(af_ops, srcu_ops_index);
+
+			if (err < 0)
+				return err;
 		}
 	}
 
@@ -3164,11 +3183,18 @@  static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
 		int rem;
 
 		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
-			const struct rtnl_af_ops *af_ops;
+			struct rtnl_af_ops *af_ops;
+			int srcu_ops_index;
 
-			BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
+			af_ops = rtnl_af_lookup(nla_type(af), &srcu_ops_index);
+			if (!af_ops) {
+				err = -EAFNOSUPPORT;
+				goto errout;
+			}
 
 			err = af_ops->set_link_af(dev, af, extack);
+			rtnl_af_put(af_ops, srcu_ops_index);
+
 			if (err < 0)
 				goto errout;