diff mbox series

[v1,net-next,07/13] rtnetlink: Protect struct rtnl_link_ops with SRCU.

Message ID 20241009231656.57830-8-kuniyu@amazon.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series rtnetlink: Refactor rtnl_{new,del,set}link() for per-netns RTNL. | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 19 this patch: 19
netdev/build_tools success Errors and warnings before: 0 (+2) this patch: 0 (+2)
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 43 this patch: 43
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 2885 this patch: 2887
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 13 this patch: 13
netdev/source_inline success Was 0 now: 0

Commit Message

Kuniyuki Iwashima Oct. 9, 2024, 11:16 p.m. UTC
Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK
even when its module is being unloaded.

Let's use SRCU to protect rtnl_link_ops.

rtnl_link_ops_get() now iterates link_ops under RCU and returns
SRCU-protected ops pointer.  The caller must call rtnl_link_ops_put()
to release the pointer after the use.

Also, __rtnl_link_unregister() unlinks the ops first and calls
synchronize_srcu() to wait for inflight RTM_NEWLINK requests to
complete.

Note that link_ops needs to be protected by its dedicated lock
when RTNL is removed.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/rtnetlink.h |  5 ++-
 net/core/rtnetlink.c    | 78 +++++++++++++++++++++++++++++------------
 2 files changed, 60 insertions(+), 23 deletions(-)

Comments

Eric Dumazet Oct. 10, 2024, 1:02 p.m. UTC | #1
On Thu, Oct 10, 2024 at 1:19 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
> guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK
> even when its module is being unloaded.
>
> Let's use SRCU to protect rtnl_link_ops.
>
> rtnl_link_ops_get() now iterates link_ops under RCU and returns
> SRCU-protected ops pointer.  The caller must call rtnl_link_ops_put()
> to release the pointer after the use.
>
> Also, __rtnl_link_unregister() unlinks the ops first and calls
> synchronize_srcu() to wait for inflight RTM_NEWLINK requests to
> complete.
>
> Note that link_ops needs to be protected by its dedicated lock
> when RTNL is removed.
>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
>  include/net/rtnetlink.h |  5 ++-
>  net/core/rtnetlink.c    | 78 +++++++++++++++++++++++++++++------------
>  2 files changed, 60 insertions(+), 23 deletions(-)
>
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index b45d57b5968a..c873fd6193ed 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -3,6 +3,7 @@
>  #define __NET_RTNETLINK_H
>
>  #include <linux/rtnetlink.h>
> +#include <linux/srcu.h>
>  #include <net/netlink.h>
>
>  typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
> @@ -47,7 +48,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
>  /**
>   *     struct rtnl_link_ops - rtnetlink link operations
>   *
> - *     @list: Used internally
> + *     @list: Used internally, protected by RTNL and SRCU
> + *     @srcu: Used internally
>   *     @kind: Identifier
>   *     @netns_refund: Physical device, move to init_net on netns exit
>   *     @maxtype: Highest device specific netlink attribute number
> @@ -78,6 +80,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
>   */
>  struct rtnl_link_ops {
>         struct list_head        list;
> +       struct srcu_struct      srcu;
>
>         const char              *kind;
>
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 24545c5b7e48..7f464554d881 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -456,15 +456,29 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all);
>
>  static LIST_HEAD(link_ops);
>
> -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
> +static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index)
>  {
> -       const struct rtnl_link_ops *ops;
> +       struct rtnl_link_ops *ops;
>
> -       list_for_each_entry(ops, &link_ops, list) {
> -               if (!strcmp(ops->kind, kind))
> -                       return ops;
> +       rcu_read_lock();
> +
> +       list_for_each_entry_rcu(ops, &link_ops, list) {
> +               if (!strcmp(ops->kind, kind)) {
> +                       *srcu_index = srcu_read_lock(&ops->srcu);
> +                       goto unlock;
> +               }
>         }
> -       return NULL;
> +
> +       ops = NULL;
> +unlock:
> +       rcu_read_unlock();
> +
> +       return ops;
> +}
> +
> +static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index)
> +{
> +       srcu_read_unlock(&ops->srcu, srcu_index);
>  }
>
>  /**
> @@ -479,8 +493,15 @@ static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
>   */
>  int __rtnl_link_register(struct rtnl_link_ops *ops)
>  {
> -       if (rtnl_link_ops_get(ops->kind))
> -               return -EEXIST;
> +       struct rtnl_link_ops *tmp;
> +
> +       /* When RTNL is removed, add lock for link_ops. */
> +       ASSERT_RTNL();
> +
> +       list_for_each_entry(tmp, &link_ops, list) {
> +               if (!strcmp(ops->kind, tmp->kind))
> +                       return -EEXIST;
> +       }
>
>         /* The check for alloc/setup is here because if ops
>          * does not have that filled up, it is not possible
> @@ -490,7 +511,9 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
>         if ((ops->alloc || ops->setup) && !ops->dellink)
>                 ops->dellink = unregister_netdevice_queue;
>
> -       list_add_tail(&ops->list, &link_ops);
> +       init_srcu_struct(&ops->srcu);

init_srcu_struct() could fail.

> +       list_add_tail_rcu(&ops->list, &link_ops);
> +
>         return 0;
Kuniyuki Iwashima Oct. 10, 2024, 4:20 p.m. UTC | #2
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2024 15:02:39 +0200
> > @@ -490,7 +511,9 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
> >         if ((ops->alloc || ops->setup) && !ops->dellink)
> >                 ops->dellink = unregister_netdevice_queue;
> >
> > -       list_add_tail(&ops->list, &link_ops);
> > +       init_srcu_struct(&ops->srcu);
> 
> init_srcu_struct() could fail.

Oh, I somehow assumed init wouldn't fail.
Will fix in v2.

Thanks!
diff mbox series

Patch

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index b45d57b5968a..c873fd6193ed 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -3,6 +3,7 @@ 
 #define __NET_RTNETLINK_H
 
 #include <linux/rtnetlink.h>
+#include <linux/srcu.h>
 #include <net/netlink.h>
 
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
@@ -47,7 +48,8 @@  static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
 /**
  *	struct rtnl_link_ops - rtnetlink link operations
  *
- *	@list: Used internally
+ *	@list: Used internally, protected by RTNL and SRCU
+ *	@srcu: Used internally
  *	@kind: Identifier
  *	@netns_refund: Physical device, move to init_net on netns exit
  *	@maxtype: Highest device specific netlink attribute number
@@ -78,6 +80,7 @@  static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  */
 struct rtnl_link_ops {
 	struct list_head	list;
+	struct srcu_struct	srcu;
 
 	const char		*kind;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 24545c5b7e48..7f464554d881 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -456,15 +456,29 @@  EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
 static LIST_HEAD(link_ops);
 
-static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index)
 {
-	const struct rtnl_link_ops *ops;
+	struct rtnl_link_ops *ops;
 
-	list_for_each_entry(ops, &link_ops, list) {
-		if (!strcmp(ops->kind, kind))
-			return ops;
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(ops, &link_ops, list) {
+		if (!strcmp(ops->kind, kind)) {
+			*srcu_index = srcu_read_lock(&ops->srcu);
+			goto unlock;
+		}
 	}
-	return NULL;
+
+	ops = NULL;
+unlock:
+	rcu_read_unlock();
+
+	return ops;
+}
+
+static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index)
+{
+	srcu_read_unlock(&ops->srcu, srcu_index);
 }
 
 /**
@@ -479,8 +493,15 @@  static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
  */
 int __rtnl_link_register(struct rtnl_link_ops *ops)
 {
-	if (rtnl_link_ops_get(ops->kind))
-		return -EEXIST;
+	struct rtnl_link_ops *tmp;
+
+	/* When RTNL is removed, add lock for link_ops. */
+	ASSERT_RTNL();
+
+	list_for_each_entry(tmp, &link_ops, list) {
+		if (!strcmp(ops->kind, tmp->kind))
+			return -EEXIST;
+	}
 
 	/* The check for alloc/setup is here because if ops
 	 * does not have that filled up, it is not possible
@@ -490,7 +511,9 @@  int __rtnl_link_register(struct rtnl_link_ops *ops)
 	if ((ops->alloc || ops->setup) && !ops->dellink)
 		ops->dellink = unregister_netdevice_queue;
 
-	list_add_tail(&ops->list, &link_ops);
+	init_srcu_struct(&ops->srcu);
+	list_add_tail_rcu(&ops->list, &link_ops);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__rtnl_link_register);
@@ -541,10 +564,11 @@  void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
 	struct net *net;
 
-	for_each_net(net) {
+	list_del_rcu(&ops->list);
+	synchronize_srcu(&ops->srcu);
+
+	for_each_net(net)
 		__rtnl_kill_links(net, ops);
-	}
-	list_del(&ops->list);
 }
 EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
 
@@ -2157,10 +2181,11 @@  static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
 	[IFLA_XDP_PROG_ID]	= { .type = NLA_U32 },
 };
 
-static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
+static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla,
+						  int *ops_srcu_index)
 {
-	const struct rtnl_link_ops *ops = NULL;
 	struct nlattr *linfo[IFLA_INFO_MAX + 1];
+	struct rtnl_link_ops *ops = NULL;
 
 	if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
 		return NULL;
@@ -2169,7 +2194,7 @@  static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
 		char kind[MODULE_NAME_LEN];
 
 		nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
-		ops = rtnl_link_ops_get(kind);
+		ops = rtnl_link_ops_get(kind, ops_srcu_index);
 	}
 
 	return ops;
@@ -2289,8 +2314,8 @@  static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
 
 static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	const struct rtnl_link_ops *kind_ops = NULL;
 	struct netlink_ext_ack *extack = cb->extack;
+	struct rtnl_link_ops *kind_ops = NULL;
 	const struct nlmsghdr *nlh = cb->nlh;
 	struct net *net = sock_net(skb->sk);
 	unsigned int flags = NLM_F_MULTI;
@@ -2301,6 +2326,7 @@  static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *tgt_net = net;
 	u32 ext_filter_mask = 0;
 	struct net_device *dev;
+	int ops_srcu_index;
 	int master_idx = 0;
 	int netnsid = -1;
 	int err, i;
@@ -2334,7 +2360,7 @@  static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			master_idx = nla_get_u32(tb[i]);
 			break;
 		case IFLA_LINKINFO:
-			kind_ops = linkinfo_to_kind_ops(tb[i]);
+			kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index);
 			break;
 		default:
 			if (cb->strict_check) {
@@ -2360,6 +2386,10 @@  static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 		if (err < 0)
 			break;
 	}
+
+	if (kind_ops)
+		rtnl_link_ops_put(kind_ops, ops_srcu_index);
+
 	cb->seq = tgt_net->dev_base_seq;
 	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 	if (netnsid >= 0)
@@ -3746,8 +3776,9 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 			struct netlink_ext_ack *extack)
 {
 	struct nlattr **tb, **linkinfo, **data = NULL;
-	const struct rtnl_link_ops *ops = NULL;
+	struct rtnl_link_ops *ops = NULL;
 	struct rtnl_newlink_tbs *tbs;
+	int ops_srcu_index;
 	int ret;
 
 	tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
@@ -3779,13 +3810,13 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		char kind[MODULE_NAME_LEN];
 
 		nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
-		ops = rtnl_link_ops_get(kind);
+		ops = rtnl_link_ops_get(kind, &ops_srcu_index);
 #ifdef CONFIG_MODULES
 		if (!ops) {
 			__rtnl_unlock();
 			request_module("rtnl-link-%s", kind);
 			rtnl_lock();
-			ops = rtnl_link_ops_get(kind);
+			ops = rtnl_link_ops_get(kind, &ops_srcu_index);
 		}
 #endif
 	}
@@ -3799,7 +3830,7 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 							  linkinfo[IFLA_INFO_DATA],
 							  ops->policy, extack);
 			if (ret < 0)
-				goto free;
+				goto put_ops;
 
 			data = tbs->attr;
 		}
@@ -3807,12 +3838,15 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (ops->validate) {
 			ret = ops->validate(tb, data, extack);
 			if (ret < 0)
-				goto free;
+				goto put_ops;
 		}
 	}
 
 	ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack);
 
+put_ops:
+	if (ops)
+		rtnl_link_ops_put(ops, ops_srcu_index);
 free:
 	kfree(tbs);
 	return ret;