Message ID | 20240226155055.1141336-14-edumazet@google.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | ipv6: lockless accesses to devconf | expand |
Mon, Feb 26, 2024 at 04:50:55PM CET, edumazet@google.com wrote: >1) inet6_netconf_dump_devconf() can run under RCU protection > instead of RTNL. > >2) properly return 0 at the end of a dump, avoiding an > an extra recvmsg() system call. > >3) Do not use inet6_base_seq() anymore, for_each_netdev_dump() > has nice properties. Restarting a GETDEVCONF dump if a device has > been added/removed or if net->ipv6.dev_addr_genid has changed is moot. > >Signed-off-by: Eric Dumazet <edumazet@google.com> >--- > net/ipv6/addrconf.c | 103 +++++++++++++++++++------------------------- > 1 file changed, 44 insertions(+), 59 deletions(-) > >diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c >index 74c33b132073934290632a953bce8ee6a5124ca9..08b4728733e3ed16d139d2bd4b50328552b3c27f 100644 >--- a/net/ipv6/addrconf.c >+++ b/net/ipv6/addrconf.c >@@ -727,17 +727,18 @@ static u32 inet6_base_seq(const struct net *net) > return res; > } > >- > static int inet6_netconf_dump_devconf(struct sk_buff *skb, > struct netlink_callback *cb) > { > const struct nlmsghdr *nlh = cb->nlh; > struct net *net = sock_net(skb->sk); >- int h, s_h; >- int idx, s_idx; >+ struct { >+ unsigned long ifindex; >+ unsigned int all_default; >+ } *ctx = (void *)cb->ctx; > struct net_device *dev; > struct inet6_dev *idev; >- struct hlist_head *head; >+ int err = 0; > > if (cb->strict_check) { > struct netlink_ext_ack *extack = cb->extack; >@@ -754,64 +755,47 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, > } > } > >- s_h = cb->args[0]; >- s_idx = idx = cb->args[1]; >- >- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { >- idx = 0; >- head = &net->dev_index_head[h]; >- rcu_read_lock(); >- cb->seq = inet6_base_seq(net); >- hlist_for_each_entry_rcu(dev, head, index_hlist) { >- if (idx < s_idx) >- goto cont; >- idev = __in6_dev_get(dev); >- if (!idev) >- goto cont; >- >- if (inet6_netconf_fill_devconf(skb, dev->ifindex, >- &idev->cnf, >- NETLINK_CB(cb->skb).portid, >- nlh->nlmsg_seq, >- RTM_NEWNETCONF, >- NLM_F_MULTI, >- NETCONFA_ALL) < 0) { >- rcu_read_unlock(); >- goto done; >- } >- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); >-cont: >- idx++; >- } >- rcu_read_unlock(); >+ rcu_read_lock(); >+ for_each_netdev_dump(net, dev, ctx->ifindex) { >+ idev = __in6_dev_get(dev); >+ if (!idev) >+ continue; >+ err = inet6_netconf_fill_devconf(skb, dev->ifindex, >+ &idev->cnf, >+ NETLINK_CB(cb->skb).portid, >+ nlh->nlmsg_seq, >+ RTM_NEWNETCONF, >+ NLM_F_MULTI, >+ NETCONFA_ALL); >+ if (err < 0) >+ goto done; > } >- if (h == NETDEV_HASHENTRIES) { >- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, >- net->ipv6.devconf_all, >- NETLINK_CB(cb->skb).portid, >- nlh->nlmsg_seq, >- RTM_NEWNETCONF, NLM_F_MULTI, >- NETCONFA_ALL) < 0) >+ if (ctx->all_default == 0) { >+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, >+ net->ipv6.devconf_all, >+ NETLINK_CB(cb->skb).portid, >+ nlh->nlmsg_seq, >+ RTM_NEWNETCONF, NLM_F_MULTI, >+ NETCONFA_ALL); >+ if (err < 0) > goto done; >- else >- h++; >- } >- if (h == NETDEV_HASHENTRIES + 1) { >- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, >- net->ipv6.devconf_dflt, >- NETLINK_CB(cb->skb).portid, >- nlh->nlmsg_seq, >- RTM_NEWNETCONF, NLM_F_MULTI, >- NETCONFA_ALL) < 0) >+ ctx->all_default++; >+ } >+ if (ctx->all_default == 1) { >+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, >+ net->ipv6.devconf_dflt, >+ NETLINK_CB(cb->skb).portid, >+ nlh->nlmsg_seq, >+ RTM_NEWNETCONF, NLM_F_MULTI, >+ NETCONFA_ALL); >+ if (err < 0) > goto done; >- else >- h++; >+ ctx->all_default++; > } >-done: >- cb->args[0] = h; >- cb->args[1] = idx; >- >- return skb->len; >+done: if (err < 0 && likely(skb->len)) It is common to not mix label and other statement on the same line, could you split? Otherwise the patch and the set looks good to me. Thanks! >+ err = skb->len; >+ rcu_read_unlock(); >+ return err; > } > > #ifdef CONFIG_SYSCTL >@@ -7503,7 +7487,8 @@ int __init addrconf_init(void) > err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, > inet6_netconf_get_devconf, > inet6_netconf_dump_devconf, >- RTNL_FLAG_DOIT_UNLOCKED); >+ RTNL_FLAG_DOIT_UNLOCKED | >+ RTNL_FLAG_DUMP_UNLOCKED); > if (err < 0) > goto errout; > err = ipv6_addr_label_rtnl_register(); >-- >2.44.0.rc1.240.g4c46232300-goog > >
On Mon, Feb 26, 2024 at 5:55 PM Jiri Pirko <jiri@resnulli.us> wrote: > > Mon, Feb 26, 2024 at 04:50:55PM CET, edumazet@google.com wrote: > >1) inet6_netconf_dump_devconf() can run under RCU protection > > instead of RTNL. > > >+ ctx->all_default++; > > } > >-done: > >- cb->args[0] = h; > >- cb->args[1] = idx; > >- > >- return skb->len; > >+done: if (err < 0 && likely(skb->len)) > > It is common to not mix label and other statement on the same line, > could you split? Sure thing ! > > Otherwise the patch and the set looks good to me. Thanks! > >
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 74c33b132073934290632a953bce8ee6a5124ca9..08b4728733e3ed16d139d2bd4b50328552b3c27f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -727,17 +727,18 @@ static u32 inet6_base_seq(const struct net *net) return res; } - static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -754,64 +755,47 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet6_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (inet6_netconf_fill_devconf(skb, dev->ifindex, - &idev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv6.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv6.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } -done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; +done: if (err < 0 && likely(skb->len)) + err = skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -7503,7 +7487,8 @@ int __init addrconf_init(void) err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = ipv6_addr_label_rtnl_register();
1) inet6_netconf_dump_devconf() can run under RCU protection instead of RTNL. 2) properly return 0 at the end of a dump, avoiding an an extra recvmsg() system call. 3) Do not use inet6_base_seq() anymore, for_each_netdev_dump() has nice properties. Restarting a GETDEVCONF dump if a device has been added/removed or if net->ipv6.dev_addr_genid has changed is moot. Signed-off-by: Eric Dumazet <edumazet@google.com> --- net/ipv6/addrconf.c | 103 +++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 59 deletions(-)