diff mbox series

[net-next,09/11] can: gw: switch cangw_pernet_exit() to batch mode

Message ID 20220207171756.1304544-10-eric.dumazet@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: speedup netns dismantles | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 6 this patch: 6
netdev/cc_maintainers warning 1 maintainers not CCed: linux-can@vger.kernel.org
netdev/build_clang fail Errors and warnings before: 6 this patch: 6
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 6 this patch: 6
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 22 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Feb. 7, 2022, 5:17 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

cleanup_net() is competing with other rtnl users.

Avoiding to acquire rtnl for each netns before calling
cgw_remove_all_jobs() gives chance for cleanup_net()
to progress much faster, holding rtnl a bit longer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/gw.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Oliver Hartkopp Feb. 7, 2022, 5:41 p.m. UTC | #1
On 07.02.22 18:17, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> cleanup_net() is competing with other rtnl users.
> 
> Avoiding to acquire rtnl for each netns before calling
> cgw_remove_all_jobs() gives chance for cleanup_net()
> to progress much faster, holding rtnl a bit longer.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Oliver Hartkopp <socketcan@hartkopp.net>
> Cc: Marc Kleine-Budde <mkl@pengutronix.de>
> ---
>   net/can/gw.c | 9 ++++++---
>   1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/net/can/gw.c b/net/can/gw.c
> index d8861e862f157aec36c417b71eb7e8f59bd064b9..24221352e059be9fb9aca3819be6a7ac4cdef144 100644
> --- a/net/can/gw.c
> +++ b/net/can/gw.c
> @@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
>   	return 0;
>   }
>   
> -static void __net_exit cangw_pernet_exit(struct net *net)
> +static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
>   {
> +	struct net *net;
> +
>   	rtnl_lock();
> -	cgw_remove_all_jobs(net);
> +	list_for_each_entry(net, net_list, exit_list)
> +		cgw_remove_all_jobs(net);

Instead of removing the jobs for ONE net namespace it seems you are 
remove removing the jobs for ALL net namespaces?

Looks wrong to me.

Best regards,
Oliver


>   	rtnl_unlock();
>   }
>   
>   static struct pernet_operations cangw_pernet_ops = {
>   	.init = cangw_pernet_init,
> -	.exit = cangw_pernet_exit,
> +	.exit_batch = cangw_pernet_exit_batch,
>   };
>   
>   static __init int cgw_module_init(void)
Eric Dumazet Feb. 7, 2022, 5:54 p.m. UTC | #2
On Mon, Feb 7, 2022 at 9:41 AM Oliver Hartkopp <socketcan@hartkopp.net> wrote:
>
>
>
> On 07.02.22 18:17, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > cleanup_net() is competing with other rtnl users.
> >
> > Avoiding to acquire rtnl for each netns before calling
> > cgw_remove_all_jobs() gives chance for cleanup_net()
> > to progress much faster, holding rtnl a bit longer.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Cc: Oliver Hartkopp <socketcan@hartkopp.net>
> > Cc: Marc Kleine-Budde <mkl@pengutronix.de>
> > ---
> >   net/can/gw.c | 9 ++++++---
> >   1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/net/can/gw.c b/net/can/gw.c
> > index d8861e862f157aec36c417b71eb7e8f59bd064b9..24221352e059be9fb9aca3819be6a7ac4cdef144 100644
> > --- a/net/can/gw.c
> > +++ b/net/can/gw.c
> > @@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
> >       return 0;
> >   }
> >
> > -static void __net_exit cangw_pernet_exit(struct net *net)
> > +static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
> >   {
> > +     struct net *net;
> > +
> >       rtnl_lock();
> > -     cgw_remove_all_jobs(net);
> > +     list_for_each_entry(net, net_list, exit_list)
> > +             cgw_remove_all_jobs(net);
>
> Instead of removing the jobs for ONE net namespace it seems you are
> remove removing the jobs for ALL net namespaces?
>
> Looks wrong to me.

I see nothing wrong in my patch.

I think you have to look more closely at ops_exit_list() in
net/core/net_namespace.c


BTW, the sychronize_rcu() call in cgw_remove_all_jobs is definitely
bad, you should absolutely replace it by call_rcu() or kfree_rcu()

>
> Best regards,
> Oliver
>
>
> >       rtnl_unlock();
> >   }
> >
> >   static struct pernet_operations cangw_pernet_ops = {
> >       .init = cangw_pernet_init,
> > -     .exit = cangw_pernet_exit,
> > +     .exit_batch = cangw_pernet_exit_batch,
> >   };
> >
> >   static __init int cgw_module_init(void)
Oliver Hartkopp Feb. 7, 2022, 6:39 p.m. UTC | #3
On 07.02.22 18:54, Eric Dumazet wrote:
> On Mon, Feb 7, 2022 at 9:41 AM Oliver Hartkopp <socketcan@hartkopp.net> wrote:
(..)
>>> -static void __net_exit cangw_pernet_exit(struct net *net)
>>> +static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
>>>    {
>>> +     struct net *net;
>>> +
>>>        rtnl_lock();
>>> -     cgw_remove_all_jobs(net);
>>> +     list_for_each_entry(net, net_list, exit_list)
>>> +             cgw_remove_all_jobs(net);
>>
>> Instead of removing the jobs for ONE net namespace it seems you are
>> remove removing the jobs for ALL net namespaces?
>>
>> Looks wrong to me.
> 
> I see nothing wrong in my patch.
> 
> I think you have to look more closely at ops_exit_list() in
> net/core/net_namespace.c

Ok, thanks. Your patch just moved the list_for_each_entry() to gw.c.
So there is no functional difference.

Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>

> BTW, the sychronize_rcu() call in cgw_remove_all_jobs is definitely
> bad, you should absolutely replace it by call_rcu() or kfree_rcu()

Advise is welcome!

The synchronize_rcu() has been introduced in fb8696ab14ad ("can: gw: 
synchronize rcu operations before removing gw job entry") as 
can_can_gw_rcv() is called under RCU protection (NET_RX softirq).

That patch was a follow-up to d5f9023fa61e ("can: bcm: delay release of 
struct bcm_op after synchronize_rcu()") where Thadeu Lima de Souza 
Cascardo detected a race in the BCM code.

When call_rcu() is enough to make sure we do not get a race in 
can_can_gw_rcv() while receiving skbs and removing filters with 
cgw_unregister_filter() I would be happy this rcu thing being fixed up.

Best regards,
Oliver
Eric Dumazet Feb. 7, 2022, 6:50 p.m. UTC | #4
On Mon, Feb 7, 2022 at 10:40 AM Oliver Hartkopp <socketcan@hartkopp.net> wrote:
>
>
>
> On 07.02.22 18:54, Eric Dumazet wrote:
> > On Mon, Feb 7, 2022 at 9:41 AM Oliver Hartkopp <socketcan@hartkopp.net> wrote:
> (..)
> >>> -static void __net_exit cangw_pernet_exit(struct net *net)
> >>> +static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
> >>>    {
> >>> +     struct net *net;
> >>> +
> >>>        rtnl_lock();
> >>> -     cgw_remove_all_jobs(net);
> >>> +     list_for_each_entry(net, net_list, exit_list)
> >>> +             cgw_remove_all_jobs(net);
> >>
> >> Instead of removing the jobs for ONE net namespace it seems you are
> >> remove removing the jobs for ALL net namespaces?
> >>
> >> Looks wrong to me.
> >
> > I see nothing wrong in my patch.
> >
> > I think you have to look more closely at ops_exit_list() in
> > net/core/net_namespace.c
>
> Ok, thanks. Your patch just moved the list_for_each_entry() to gw.c.
> So there is no functional difference.
>
> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
>
> > BTW, the sychronize_rcu() call in cgw_remove_all_jobs is definitely
> > bad, you should absolutely replace it by call_rcu() or kfree_rcu()
>
> Advise is welcome!
>
> The synchronize_rcu() has been introduced in fb8696ab14ad ("can: gw:
> synchronize rcu operations before removing gw job entry") as
> can_can_gw_rcv() is called under RCU protection (NET_RX softirq).
>
> That patch was a follow-up to d5f9023fa61e ("can: bcm: delay release of
> struct bcm_op after synchronize_rcu()") where Thadeu Lima de Souza
> Cascardo detected a race in the BCM code.
>
> When call_rcu() is enough to make sure we do not get a race in
> can_can_gw_rcv() while receiving skbs and removing filters with
> cgw_unregister_filter() I would be happy this rcu thing being fixed up.
>

Can you test this straightforward patch, thanks !
diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f157aec36c417b71eb7e8f59bd064b9..20e74fe7d0906dccc65732b8f9e7e14e2d1192c3
100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -577,6 +577,13 @@ static inline void cgw_unregister_filter(struct
net *net, struct cgw_job *gwj)
                          gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
 }

+static void cgw_job_free_rcu(struct rcu_head *rcu_head)
+{
+       struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu);
+
+       kmem_cache_free(cgw_cache, gwj);
+}
+
 static int cgw_notifier(struct notifier_block *nb,
                        unsigned long msg, void *ptr)
 {
@@ -596,8 +603,7 @@ static int cgw_notifier(struct notifier_block *nb,
                        if (gwj->src.dev == dev || gwj->dst.dev == dev) {
                                hlist_del(&gwj->list);
                                cgw_unregister_filter(net, gwj);
-                               synchronize_rcu();
-                               kmem_cache_free(cgw_cache, gwj);
+                               call_rcu(&gwj->rcu, cgw_job_free_rcu);
                        }
                }
        }
@@ -1155,8 +1161,7 @@ static void cgw_remove_all_jobs(struct net *net)
        hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
                hlist_del(&gwj->list);
                cgw_unregister_filter(net, gwj);
-               synchronize_rcu();
-               kmem_cache_free(cgw_cache, gwj);
+               call_rcu(&gwj->rcu, cgw_job_free_rcu);
        }
 }

@@ -1224,8 +1229,7 @@ static int cgw_remove_job(struct sk_buff *skb,
struct nlmsghdr *nlh,

                hlist_del(&gwj->list);
                cgw_unregister_filter(net, gwj);
-               synchronize_rcu();
-               kmem_cache_free(cgw_cache, gwj);
+               call_rcu(&gwj->rcu, cgw_job_free_rcu);
                err = 0;
                break;
        }
diff mbox series

Patch

diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f157aec36c417b71eb7e8f59bd064b9..24221352e059be9fb9aca3819be6a7ac4cdef144 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1239,16 +1239,19 @@  static int __net_init cangw_pernet_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
 {
+	struct net *net;
+
 	rtnl_lock();
-	cgw_remove_all_jobs(net);
+	list_for_each_entry(net, net_list, exit_list)
+		cgw_remove_all_jobs(net);
 	rtnl_unlock();
 }
 
 static struct pernet_operations cangw_pernet_ops = {
 	.init = cangw_pernet_init,
-	.exit = cangw_pernet_exit,
+	.exit_batch = cangw_pernet_exit_batch,
 };
 
 static __init int cgw_module_init(void)