diff mbox series

[v1,net-next,3/6] af_unix: Define a per-netns hash table.

Message ID 20220616234714.4291-4-kuniyu@amazon.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series af_unix: Introduce per-netns socket hash table. | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4833 this patch: 4833
netdev/cc_maintainers warning 1 maintainers not CCed: viro@zeniv.linux.org.uk
netdev/build_clang success Errors and warnings before: 1190 this patch: 1190
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4983 this patch: 4983
netdev/checkpatch warning CHECK: spinlock_t definition without comment WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kuniyuki Iwashima June 16, 2022, 11:47 p.m. UTC
This commit adds a per netns hash table for AF_UNIX.

Note that its size is fixed as UNIX_HASH_SIZE for now.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/af_unix.h    |  5 +++++
 include/net/netns/unix.h |  2 ++
 net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
 3 files changed, 41 insertions(+), 6 deletions(-)

Comments

Eric Dumazet June 17, 2022, 4:23 a.m. UTC | #1
On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> This commit adds a per netns hash table for AF_UNIX.
>
> Note that its size is fixed as UNIX_HASH_SIZE for now.
>

Note: Please include memory costs for this table, including when LOCKDEP is on.


> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
>  include/net/af_unix.h    |  5 +++++
>  include/net/netns/unix.h |  2 ++
>  net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
>  3 files changed, 41 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index acb56e463db1..0a17e49af0c9 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight;
>  extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
>  extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
>
> +struct unix_hashbucket {
> +       spinlock_t              lock;
> +       struct hlist_head       head;
> +};
> +
>  struct unix_address {
>         refcount_t      refcnt;
>         int             len;
> diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
> index 91a3d7e39198..975c4e3f8a5b 100644
> --- a/include/net/netns/unix.h
> +++ b/include/net/netns/unix.h
> @@ -5,8 +5,10 @@
>  #ifndef __NETNS_UNIX_H__
>  #define __NETNS_UNIX_H__
>
> +struct unix_hashbucket;
>  struct ctl_table_header;
>  struct netns_unix {
> +       struct unix_hashbucket  *hash;
>         int                     sysctl_max_dgram_qlen;
>         struct ctl_table_header *ctl;
>  };
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index c0804ae9c96a..3c07702e2349 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
>
>  static int __net_init unix_net_init(struct net *net)
>  {
> -       int error = -ENOMEM;
> +       int i;
>
>         net->unx.sysctl_max_dgram_qlen = 10;
>         if (unix_sysctl_register(net))
> @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net)
>
>  #ifdef CONFIG_PROC_FS
>         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
> -                       sizeof(struct seq_net_private))) {
> -               unix_sysctl_unregister(net);
> -               goto out;
> +                            sizeof(struct seq_net_private)))
> +               goto err_sysctl;
> +#endif
> +
> +       net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> +                               GFP_KERNEL);

This will fail under memory pressure.

Prefer kvmalloc_array()

> +       if (!net->unx.hash)
> +               goto err_proc;
> +
> +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> +               INIT_HLIST_HEAD(&net->unx.hash[i].head);
> +               spin_lock_init(&net->unx.hash[i].lock);
>         }
> +
> +       return 0;
> +
> +err_proc:
> +#ifdef CONFIG_PROC_FS
> +       remove_proc_entry("unix", net->proc_net);
>  #endif
> -       error = 0;
> +err_sysctl:
> +       unix_sysctl_unregister(net);
>  out:
> -       return error;
> +       return -ENOMEM;
>  }
>
>  static void __net_exit unix_net_exit(struct net *net)
>  {
> +       kfree(net->unx.hash);

kvfree()

>         unix_sysctl_unregister(net);
>         remove_proc_entry("unix", net->proc_net);
>  }
> @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void)
>
>         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
>
> +       init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> +                                   GFP_KERNEL);

Why are you allocating the hash table twice ? It should be done
already in  unix_net_init() ?

> +       if (!init_net.unx.hash)
> +               goto out;
> +
> +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> +               INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
> +               spin_lock_init(&init_net.unx.hash[i].lock);
> +       }
> +
>         for (i = 0; i < UNIX_HASH_SIZE; i++)
>                 spin_lock_init(&unix_table_locks[i]);
>
> @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void)
>         proto_unregister(&unix_dgram_proto);
>         proto_unregister(&unix_stream_proto);
>         unregister_pernet_subsys(&unix_net_ops);
> +       kfree(init_net.unx.hash);

   Not needed.

>  }
>
>  /* Earlier than device_initcall() so that other drivers invoking
> --
> 2.30.2
>
Kuniyuki Iwashima June 17, 2022, 5:33 a.m. UTC | #2
From:   Eric Dumazet <edumazet@google.com>
Date:   Fri, 17 Jun 2022 06:23:37 +0200
> On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > This commit adds a per netns hash table for AF_UNIX.
> >
> > Note that its size is fixed as UNIX_HASH_SIZE for now.
> >
> 
> Note: Please include memory costs for this table, including when LOCKDEP is on.

I'm sorry but I'm not quite sure.
Do you mean the table should have the size as member of its struct?
Could you elaborate on memory costs and LOCKDEP?


> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > ---
> >  include/net/af_unix.h    |  5 +++++
> >  include/net/netns/unix.h |  2 ++
> >  net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
> >  3 files changed, 41 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> > index acb56e463db1..0a17e49af0c9 100644
> > --- a/include/net/af_unix.h
> > +++ b/include/net/af_unix.h
> > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight;
> >  extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
> >  extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
> >
> > +struct unix_hashbucket {
> > +       spinlock_t              lock;
> > +       struct hlist_head       head;
> > +};
> > +
> >  struct unix_address {
> >         refcount_t      refcnt;
> >         int             len;
> > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
> > index 91a3d7e39198..975c4e3f8a5b 100644
> > --- a/include/net/netns/unix.h
> > +++ b/include/net/netns/unix.h
> > @@ -5,8 +5,10 @@
> >  #ifndef __NETNS_UNIX_H__
> >  #define __NETNS_UNIX_H__
> >
> > +struct unix_hashbucket;
> >  struct ctl_table_header;
> >  struct netns_unix {
> > +       struct unix_hashbucket  *hash;
> >         int                     sysctl_max_dgram_qlen;
> >         struct ctl_table_header *ctl;
> >  };
> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > index c0804ae9c96a..3c07702e2349 100644
> > --- a/net/unix/af_unix.c
> > +++ b/net/unix/af_unix.c
> > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
> >
> >  static int __net_init unix_net_init(struct net *net)
> >  {
> > -       int error = -ENOMEM;
> > +       int i;
> >
> >         net->unx.sysctl_max_dgram_qlen = 10;
> >         if (unix_sysctl_register(net))
> > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net)
> >
> >  #ifdef CONFIG_PROC_FS
> >         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
> > -                       sizeof(struct seq_net_private))) {
> > -               unix_sysctl_unregister(net);
> > -               goto out;
> > +                            sizeof(struct seq_net_private)))
> > +               goto err_sysctl;
> > +#endif
> > +
> > +       net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > +                               GFP_KERNEL);
> 
> This will fail under memory pressure.
> 
> Prefer kvmalloc_array()

Thank you for feedback!
I will use it in v2.


> > +       if (!net->unx.hash)
> > +               goto err_proc;
> > +
> > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > +               INIT_HLIST_HEAD(&net->unx.hash[i].head);
> > +               spin_lock_init(&net->unx.hash[i].lock);
> >         }
> > +
> > +       return 0;
> > +
> > +err_proc:
> > +#ifdef CONFIG_PROC_FS
> > +       remove_proc_entry("unix", net->proc_net);
> >  #endif
> > -       error = 0;
> > +err_sysctl:
> > +       unix_sysctl_unregister(net);
> >  out:
> > -       return error;
> > +       return -ENOMEM;
> >  }
> >
> >  static void __net_exit unix_net_exit(struct net *net)
> >  {
> > +       kfree(net->unx.hash);
> 
> kvfree()
> 
> >         unix_sysctl_unregister(net);
> >         remove_proc_entry("unix", net->proc_net);
> >  }
> > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void)
> >
> >         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
> >
> > +       init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > +                                   GFP_KERNEL);
> 
> Why are you allocating the hash table twice ? It should be done
> already in  unix_net_init() ?

Ah sorry, just my mistake.
I'll remove this alloc/free part.


> > +       if (!init_net.unx.hash)
> > +               goto out;
> > +
> > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > +               INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
> > +               spin_lock_init(&init_net.unx.hash[i].lock);
> > +       }
> > +
> >         for (i = 0; i < UNIX_HASH_SIZE; i++)
> >                 spin_lock_init(&unix_table_locks[i]);
> >
> > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void)
> >         proto_unregister(&unix_dgram_proto);
> >         proto_unregister(&unix_stream_proto);
> >         unregister_pernet_subsys(&unix_net_ops);
> > +       kfree(init_net.unx.hash);
> 
>    Not needed.
> 
> >  }
> >
> >  /* Earlier than device_initcall() so that other drivers invoking
> > --
> > 2.30.2
> >


Best regards,
Kuniyuki
Eric Dumazet June 17, 2022, 6:08 a.m. UTC | #3
On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> From:   Eric Dumazet <edumazet@google.com>
> Date:   Fri, 17 Jun 2022 06:23:37 +0200
> > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > >
> > > This commit adds a per netns hash table for AF_UNIX.
> > >
> > > Note that its size is fixed as UNIX_HASH_SIZE for now.
> > >
> >
> > Note: Please include memory costs for this table, including when LOCKDEP is on.
>
> I'm sorry but I'm not quite sure.
> Do you mean the table should have the size as member of its struct?
> Could you elaborate on memory costs and LOCKDEP?

I am saying that instead of two separate arrays, you are now using one
array, with holes in the structure

Without LOCKDEP, sizeof(spinlock_t) is 4.
With LOCKDEP, sizeof(spinlock_t) is bigger.

So we are trading some costs of having two shared dense arrays, and
having per-netns hash tables.

It would be nice to mention this trade off in the changelog, because
some hosts have thousands of netns and few af_unix sockets :/

>
>
> > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > > ---
> > >  include/net/af_unix.h    |  5 +++++
> > >  include/net/netns/unix.h |  2 ++
> > >  net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
> > >  3 files changed, 41 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> > > index acb56e463db1..0a17e49af0c9 100644
> > > --- a/include/net/af_unix.h
> > > +++ b/include/net/af_unix.h
> > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight;
> > >  extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
> > >  extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
> > >
> > > +struct unix_hashbucket {
> > > +       spinlock_t              lock;
> > > +       struct hlist_head       head;
> > > +};
> > > +
> > >  struct unix_address {
> > >         refcount_t      refcnt;
> > >         int             len;
> > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
> > > index 91a3d7e39198..975c4e3f8a5b 100644
> > > --- a/include/net/netns/unix.h
> > > +++ b/include/net/netns/unix.h
> > > @@ -5,8 +5,10 @@
> > >  #ifndef __NETNS_UNIX_H__
> > >  #define __NETNS_UNIX_H__
> > >
> > > +struct unix_hashbucket;
> > >  struct ctl_table_header;
> > >  struct netns_unix {
> > > +       struct unix_hashbucket  *hash;
> > >         int                     sysctl_max_dgram_qlen;
> > >         struct ctl_table_header *ctl;
> > >  };
> > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > > index c0804ae9c96a..3c07702e2349 100644
> > > --- a/net/unix/af_unix.c
> > > +++ b/net/unix/af_unix.c
> > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
> > >
> > >  static int __net_init unix_net_init(struct net *net)
> > >  {
> > > -       int error = -ENOMEM;
> > > +       int i;
> > >
> > >         net->unx.sysctl_max_dgram_qlen = 10;
> > >         if (unix_sysctl_register(net))
> > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net)
> > >
> > >  #ifdef CONFIG_PROC_FS
> > >         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
> > > -                       sizeof(struct seq_net_private))) {
> > > -               unix_sysctl_unregister(net);
> > > -               goto out;
> > > +                            sizeof(struct seq_net_private)))
> > > +               goto err_sysctl;
> > > +#endif
> > > +
> > > +       net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > +                               GFP_KERNEL);
> >
> > This will fail under memory pressure.
> >
> > Prefer kvmalloc_array()
>
> Thank you for feedback!
> I will use it in v2.
>
>
> > > +       if (!net->unx.hash)
> > > +               goto err_proc;
> > > +
> > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > +               INIT_HLIST_HEAD(&net->unx.hash[i].head);
> > > +               spin_lock_init(&net->unx.hash[i].lock);
> > >         }
> > > +
> > > +       return 0;
> > > +
> > > +err_proc:
> > > +#ifdef CONFIG_PROC_FS
> > > +       remove_proc_entry("unix", net->proc_net);
> > >  #endif
> > > -       error = 0;
> > > +err_sysctl:
> > > +       unix_sysctl_unregister(net);
> > >  out:
> > > -       return error;
> > > +       return -ENOMEM;
> > >  }
> > >
> > >  static void __net_exit unix_net_exit(struct net *net)
> > >  {
> > > +       kfree(net->unx.hash);
> >
> > kvfree()
> >
> > >         unix_sysctl_unregister(net);
> > >         remove_proc_entry("unix", net->proc_net);
> > >  }
> > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void)
> > >
> > >         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
> > >
> > > +       init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > +                                   GFP_KERNEL);
> >
> > Why are you allocating the hash table twice ? It should be done
> > already in  unix_net_init() ?
>
> Ah sorry, just my mistake.
> I'll remove this alloc/free part.
>
>
> > > +       if (!init_net.unx.hash)
> > > +               goto out;
> > > +
> > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > +               INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
> > > +               spin_lock_init(&init_net.unx.hash[i].lock);
> > > +       }
> > > +
> > >         for (i = 0; i < UNIX_HASH_SIZE; i++)
> > >                 spin_lock_init(&unix_table_locks[i]);
> > >
> > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void)
> > >         proto_unregister(&unix_dgram_proto);
> > >         proto_unregister(&unix_stream_proto);
> > >         unregister_pernet_subsys(&unix_net_ops);
> > > +       kfree(init_net.unx.hash);
> >
> >    Not needed.
> >
> > >  }
> > >
> > >  /* Earlier than device_initcall() so that other drivers invoking
> > > --
> > > 2.30.2
> > >
>
>
> Best regards,
> Kuniyuki
Kuniyuki Iwashima June 17, 2022, 6:57 a.m. UTC | #4
From:   Eric Dumazet <edumazet@google.com>
Date:   Fri, 17 Jun 2022 08:08:32 +0200
> On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > From:   Eric Dumazet <edumazet@google.com>
> > Date:   Fri, 17 Jun 2022 06:23:37 +0200
> > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > > >
> > > > This commit adds a per netns hash table for AF_UNIX.
> > > >
> > > > Note that its size is fixed as UNIX_HASH_SIZE for now.
> > > >
> > >
> > > Note: Please include memory costs for this table, including when LOCKDEP is on.
> >
> > I'm sorry but I'm not quite sure.
> > Do you mean the table should have the size as member of its struct?
> > Could you elaborate on memory costs and LOCKDEP?
> 
> I am saying that instead of two separate arrays, you are now using one
> array, with holes in the structure
> 
> Without LOCKDEP, sizeof(spinlock_t) is 4.
> With LOCKDEP, sizeof(spinlock_t) is bigger.
> 
> So we are trading some costs of having two shared dense arrays, and
> having per-netns hash tables.
> 
> It would be nice to mention this trade off in the changelog, because
> some hosts have thousands of netns and few af_unix sockets :/

Thank you for explanation in detail!
I'm on the same page.
How about having separate arrays like this in per-netns struct?

struct unix_table {
       spinlock_t *locks;
       list_head  *buckets;
}


> > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > > > ---
> > > >  include/net/af_unix.h    |  5 +++++
> > > >  include/net/netns/unix.h |  2 ++
> > > >  net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
> > > >  3 files changed, 41 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> > > > index acb56e463db1..0a17e49af0c9 100644
> > > > --- a/include/net/af_unix.h
> > > > +++ b/include/net/af_unix.h
> > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight;
> > > >  extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
> > > >  extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
> > > >
> > > > +struct unix_hashbucket {
> > > > +       spinlock_t              lock;
> > > > +       struct hlist_head       head;
> > > > +};
> > > > +
> > > >  struct unix_address {
> > > >         refcount_t      refcnt;
> > > >         int             len;
> > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
> > > > index 91a3d7e39198..975c4e3f8a5b 100644
> > > > --- a/include/net/netns/unix.h
> > > > +++ b/include/net/netns/unix.h
> > > > @@ -5,8 +5,10 @@
> > > >  #ifndef __NETNS_UNIX_H__
> > > >  #define __NETNS_UNIX_H__
> > > >
> > > > +struct unix_hashbucket;
> > > >  struct ctl_table_header;
> > > >  struct netns_unix {
> > > > +       struct unix_hashbucket  *hash;
> > > >         int                     sysctl_max_dgram_qlen;
> > > >         struct ctl_table_header *ctl;
> > > >  };
> > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > > > index c0804ae9c96a..3c07702e2349 100644
> > > > --- a/net/unix/af_unix.c
> > > > +++ b/net/unix/af_unix.c
> > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
> > > >
> > > >  static int __net_init unix_net_init(struct net *net)
> > > >  {
> > > > -       int error = -ENOMEM;
> > > > +       int i;
> > > >
> > > >         net->unx.sysctl_max_dgram_qlen = 10;
> > > >         if (unix_sysctl_register(net))
> > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net)
> > > >
> > > >  #ifdef CONFIG_PROC_FS
> > > >         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
> > > > -                       sizeof(struct seq_net_private))) {
> > > > -               unix_sysctl_unregister(net);
> > > > -               goto out;
> > > > +                            sizeof(struct seq_net_private)))
> > > > +               goto err_sysctl;
> > > > +#endif
> > > > +
> > > > +       net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > > +                               GFP_KERNEL);
> > >
> > > This will fail under memory pressure.
> > >
> > > Prefer kvmalloc_array()
> >
> > Thank you for feedback!
> > I will use it in v2.
> >
> >
> > > > +       if (!net->unx.hash)
> > > > +               goto err_proc;
> > > > +
> > > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > > +               INIT_HLIST_HEAD(&net->unx.hash[i].head);
> > > > +               spin_lock_init(&net->unx.hash[i].lock);
> > > >         }
> > > > +
> > > > +       return 0;
> > > > +
> > > > +err_proc:
> > > > +#ifdef CONFIG_PROC_FS
> > > > +       remove_proc_entry("unix", net->proc_net);
> > > >  #endif
> > > > -       error = 0;
> > > > +err_sysctl:
> > > > +       unix_sysctl_unregister(net);
> > > >  out:
> > > > -       return error;
> > > > +       return -ENOMEM;
> > > >  }
> > > >
> > > >  static void __net_exit unix_net_exit(struct net *net)
> > > >  {
> > > > +       kfree(net->unx.hash);
> > >
> > > kvfree()
> > >
> > > >         unix_sysctl_unregister(net);
> > > >         remove_proc_entry("unix", net->proc_net);
> > > >  }
> > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void)
> > > >
> > > >         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
> > > >
> > > > +       init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > > +                                   GFP_KERNEL);
> > >
> > > Why are you allocating the hash table twice ? It should be done
> > > already in  unix_net_init() ?
> >
> > Ah sorry, just my mistake.
> > I'll remove this alloc/free part.
> >
> >
> > > > +       if (!init_net.unx.hash)
> > > > +               goto out;
> > > > +
> > > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > > +               INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
> > > > +               spin_lock_init(&init_net.unx.hash[i].lock);
> > > > +       }
> > > > +
> > > >         for (i = 0; i < UNIX_HASH_SIZE; i++)
> > > >                 spin_lock_init(&unix_table_locks[i]);
> > > >
> > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void)
> > > >         proto_unregister(&unix_dgram_proto);
> > > >         proto_unregister(&unix_stream_proto);
> > > >         unregister_pernet_subsys(&unix_net_ops);
> > > > +       kfree(init_net.unx.hash);
> > >
> > >    Not needed.
> > >
> > > >  }
> > > >
> > > >  /* Earlier than device_initcall() so that other drivers invoking
> > > > --
> > > > 2.30.2
> > > >
> >
> >
> > Best regards,
> > Kuniyuki
Eric Dumazet June 17, 2022, 8 a.m. UTC | #5
On Fri, Jun 17, 2022 at 8:57 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> From:   Eric Dumazet <edumazet@google.com>
> Date:   Fri, 17 Jun 2022 08:08:32 +0200
> > On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > >
> > > From:   Eric Dumazet <edumazet@google.com>
> > > Date:   Fri, 17 Jun 2022 06:23:37 +0200
> > > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > > > >
> > > > > This commit adds a per netns hash table for AF_UNIX.
> > > > >
> > > > > Note that its size is fixed as UNIX_HASH_SIZE for now.
> > > > >
> > > >
> > > > Note: Please include memory costs for this table, including when LOCKDEP is on.
> > >
> > > I'm sorry but I'm not quite sure.
> > > Do you mean the table should have the size as member of its struct?
> > > Could you elaborate on memory costs and LOCKDEP?
> >
> > I am saying that instead of two separate arrays, you are now using one
> > array, with holes in the structure
> >
> > Without LOCKDEP, sizeof(spinlock_t) is 4.
> > With LOCKDEP, sizeof(spinlock_t) is bigger.
> >
> > So we are trading some costs of having two shared dense arrays, and
> > having per-netns hash tables.
> >
> > It would be nice to mention this trade off in the changelog, because
> > some hosts have thousands of netns and few af_unix sockets :/
>
> Thank you for explanation in detail!
> I'm on the same page.
> How about having separate arrays like this in per-netns struct?
>
> struct unix_table {
>        spinlock_t *locks;
>        list_head  *buckets;
> }

Are we sure we need per-netns locks ?

I would think that sharing 256 spinlocks would be just fine, even on
hosts with more than 256 cpus.


>
>
> > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > > > > ---
> > > > >  include/net/af_unix.h    |  5 +++++
> > > > >  include/net/netns/unix.h |  2 ++
> > > > >  net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
> > > > >  3 files changed, 41 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> > > > > index acb56e463db1..0a17e49af0c9 100644
> > > > > --- a/include/net/af_unix.h
> > > > > +++ b/include/net/af_unix.h
> > > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight;
> > > > >  extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
> > > > >  extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
> > > > >
> > > > > +struct unix_hashbucket {
> > > > > +       spinlock_t              lock;
> > > > > +       struct hlist_head       head;
> > > > > +};
> > > > > +
> > > > >  struct unix_address {
> > > > >         refcount_t      refcnt;
> > > > >         int             len;
> > > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
> > > > > index 91a3d7e39198..975c4e3f8a5b 100644
> > > > > --- a/include/net/netns/unix.h
> > > > > +++ b/include/net/netns/unix.h
> > > > > @@ -5,8 +5,10 @@
> > > > >  #ifndef __NETNS_UNIX_H__
> > > > >  #define __NETNS_UNIX_H__
> > > > >
> > > > > +struct unix_hashbucket;
> > > > >  struct ctl_table_header;
> > > > >  struct netns_unix {
> > > > > +       struct unix_hashbucket  *hash;
> > > > >         int                     sysctl_max_dgram_qlen;
> > > > >         struct ctl_table_header *ctl;
> > > > >  };
> > > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > > > > index c0804ae9c96a..3c07702e2349 100644
> > > > > --- a/net/unix/af_unix.c
> > > > > +++ b/net/unix/af_unix.c
> > > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
> > > > >
> > > > >  static int __net_init unix_net_init(struct net *net)
> > > > >  {
> > > > > -       int error = -ENOMEM;
> > > > > +       int i;
> > > > >
> > > > >         net->unx.sysctl_max_dgram_qlen = 10;
> > > > >         if (unix_sysctl_register(net))
> > > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net)
> > > > >
> > > > >  #ifdef CONFIG_PROC_FS
> > > > >         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
> > > > > -                       sizeof(struct seq_net_private))) {
> > > > > -               unix_sysctl_unregister(net);
> > > > > -               goto out;
> > > > > +                            sizeof(struct seq_net_private)))
> > > > > +               goto err_sysctl;
> > > > > +#endif
> > > > > +
> > > > > +       net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > > > +                               GFP_KERNEL);
> > > >
> > > > This will fail under memory pressure.
> > > >
> > > > Prefer kvmalloc_array()
> > >
> > > Thank you for feedback!
> > > I will use it in v2.
> > >
> > >
> > > > > +       if (!net->unx.hash)
> > > > > +               goto err_proc;
> > > > > +
> > > > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > > > +               INIT_HLIST_HEAD(&net->unx.hash[i].head);
> > > > > +               spin_lock_init(&net->unx.hash[i].lock);
> > > > >         }
> > > > > +
> > > > > +       return 0;
> > > > > +
> > > > > +err_proc:
> > > > > +#ifdef CONFIG_PROC_FS
> > > > > +       remove_proc_entry("unix", net->proc_net);
> > > > >  #endif
> > > > > -       error = 0;
> > > > > +err_sysctl:
> > > > > +       unix_sysctl_unregister(net);
> > > > >  out:
> > > > > -       return error;
> > > > > +       return -ENOMEM;
> > > > >  }
> > > > >
> > > > >  static void __net_exit unix_net_exit(struct net *net)
> > > > >  {
> > > > > +       kfree(net->unx.hash);
> > > >
> > > > kvfree()
> > > >
> > > > >         unix_sysctl_unregister(net);
> > > > >         remove_proc_entry("unix", net->proc_net);
> > > > >  }
> > > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void)
> > > > >
> > > > >         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
> > > > >
> > > > > +       init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > > > +                                   GFP_KERNEL);
> > > >
> > > > Why are you allocating the hash table twice ? It should be done
> > > > already in  unix_net_init() ?
> > >
> > > Ah sorry, just my mistake.
> > > I'll remove this alloc/free part.
> > >
> > >
> > > > > +       if (!init_net.unx.hash)
> > > > > +               goto out;
> > > > > +
> > > > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > > > +               INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
> > > > > +               spin_lock_init(&init_net.unx.hash[i].lock);
> > > > > +       }
> > > > > +
> > > > >         for (i = 0; i < UNIX_HASH_SIZE; i++)
> > > > >                 spin_lock_init(&unix_table_locks[i]);
> > > > >
> > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void)
> > > > >         proto_unregister(&unix_dgram_proto);
> > > > >         proto_unregister(&unix_stream_proto);
> > > > >         unregister_pernet_subsys(&unix_net_ops);
> > > > > +       kfree(init_net.unx.hash);
> > > >
> > > >    Not needed.
> > > >
> > > > >  }
> > > > >
> > > > >  /* Earlier than device_initcall() so that other drivers invoking
> > > > > --
> > > > > 2.30.2
> > > > >
> > >
> > >
> > > Best regards,
> > > Kuniyuki
Kuniyuki Iwashima June 17, 2022, 5:52 p.m. UTC | #6
From:   Eric Dumazet <edumazet@google.com>
Date:   Fri, 17 Jun 2022 10:00:25 +0200
> On Fri, Jun 17, 2022 at 8:57 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > From:   Eric Dumazet <edumazet@google.com>
> > Date:   Fri, 17 Jun 2022 08:08:32 +0200
> > > On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > > >
> > > > From:   Eric Dumazet <edumazet@google.com>
> > > > Date:   Fri, 17 Jun 2022 06:23:37 +0200
> > > > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > > > > >
> > > > > > This commit adds a per netns hash table for AF_UNIX.
> > > > > >
> > > > > > Note that its size is fixed as UNIX_HASH_SIZE for now.
> > > > > >
> > > > >
> > > > > Note: Please include memory costs for this table, including when LOCKDEP is on.
> > > >
> > > > I'm sorry but I'm not quite sure.
> > > > Do you mean the table should have the size as member of its struct?
> > > > Could you elaborate on memory costs and LOCKDEP?
> > >
> > > I am saying that instead of two separate arrays, you are now using one
> > > array, with holes in the structure
> > >
> > > Without LOCKDEP, sizeof(spinlock_t) is 4.
> > > With LOCKDEP, sizeof(spinlock_t) is bigger.
> > >
> > > So we are trading some costs of having two shared dense arrays, and
> > > having per-netns hash tables.
> > >
> > > It would be nice to mention this trade off in the changelog, because
> > > some hosts have thousands of netns and few af_unix sockets :/
> >
> > Thank you for explanation in detail!
> > I'm on the same page.
> > How about having separate arrays like this in per-netns struct?
> >
> > struct unix_table {
> >        spinlock_t *locks;
> >        list_head  *buckets;
> > }
> 
> Are we sure we need per-netns locks ?
> 
> I would think that sharing 256 spinlocks would be just fine, even on
> hosts with more than 256 cpus.

I ran the test written on the last patch with three kernels 10 times
for each:

  1) global locks and hash table
     1m 38s ~ 1m 43s

  2) per-netns locks and hash tables (two dense arrays version)
     11s

  3) global locks and per-netns hash tables
     15s

As you thought, the length of list has larger impact than lock contention.
But on a host with 10 cpus per-netns, per-netns locks are faster than
shared one.

What do you think about this trade-off?


> > > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > > > > > ---
> > > > > >  include/net/af_unix.h    |  5 +++++
> > > > > >  include/net/netns/unix.h |  2 ++
> > > > > >  net/unix/af_unix.c       | 40 ++++++++++++++++++++++++++++++++++------
> > > > > >  3 files changed, 41 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> > > > > > index acb56e463db1..0a17e49af0c9 100644
> > > > > > --- a/include/net/af_unix.h
> > > > > > +++ b/include/net/af_unix.h
> > > > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight;
> > > > > >  extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
> > > > > >  extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
> > > > > >
> > > > > > +struct unix_hashbucket {
> > > > > > +       spinlock_t              lock;
> > > > > > +       struct hlist_head       head;
> > > > > > +};
> > > > > > +
> > > > > >  struct unix_address {
> > > > > >         refcount_t      refcnt;
> > > > > >         int             len;
> > > > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
> > > > > > index 91a3d7e39198..975c4e3f8a5b 100644
> > > > > > --- a/include/net/netns/unix.h
> > > > > > +++ b/include/net/netns/unix.h
> > > > > > @@ -5,8 +5,10 @@
> > > > > >  #ifndef __NETNS_UNIX_H__
> > > > > >  #define __NETNS_UNIX_H__
> > > > > >
> > > > > > +struct unix_hashbucket;
> > > > > >  struct ctl_table_header;
> > > > > >  struct netns_unix {
> > > > > > +       struct unix_hashbucket  *hash;
> > > > > >         int                     sysctl_max_dgram_qlen;
> > > > > >         struct ctl_table_header *ctl;
> > > > > >  };
> > > > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > > > > > index c0804ae9c96a..3c07702e2349 100644
> > > > > > --- a/net/unix/af_unix.c
> > > > > > +++ b/net/unix/af_unix.c
> > > > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {
> > > > > >
> > > > > >  static int __net_init unix_net_init(struct net *net)
> > > > > >  {
> > > > > > -       int error = -ENOMEM;
> > > > > > +       int i;
> > > > > >
> > > > > >         net->unx.sysctl_max_dgram_qlen = 10;
> > > > > >         if (unix_sysctl_register(net))
> > > > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net)
> > > > > >
> > > > > >  #ifdef CONFIG_PROC_FS
> > > > > >         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
> > > > > > -                       sizeof(struct seq_net_private))) {
> > > > > > -               unix_sysctl_unregister(net);
> > > > > > -               goto out;
> > > > > > +                            sizeof(struct seq_net_private)))
> > > > > > +               goto err_sysctl;
> > > > > > +#endif
> > > > > > +
> > > > > > +       net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > > > > +                               GFP_KERNEL);
> > > > >
> > > > > This will fail under memory pressure.
> > > > >
> > > > > Prefer kvmalloc_array()
> > > >
> > > > Thank you for feedback!
> > > > I will use it in v2.
> > > >
> > > >
> > > > > > +       if (!net->unx.hash)
> > > > > > +               goto err_proc;
> > > > > > +
> > > > > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > > > > +               INIT_HLIST_HEAD(&net->unx.hash[i].head);
> > > > > > +               spin_lock_init(&net->unx.hash[i].lock);
> > > > > >         }
> > > > > > +
> > > > > > +       return 0;
> > > > > > +
> > > > > > +err_proc:
> > > > > > +#ifdef CONFIG_PROC_FS
> > > > > > +       remove_proc_entry("unix", net->proc_net);
> > > > > >  #endif
> > > > > > -       error = 0;
> > > > > > +err_sysctl:
> > > > > > +       unix_sysctl_unregister(net);
> > > > > >  out:
> > > > > > -       return error;
> > > > > > +       return -ENOMEM;
> > > > > >  }
> > > > > >
> > > > > >  static void __net_exit unix_net_exit(struct net *net)
> > > > > >  {
> > > > > > +       kfree(net->unx.hash);
> > > > >
> > > > > kvfree()
> > > > >
> > > > > >         unix_sysctl_unregister(net);
> > > > > >         remove_proc_entry("unix", net->proc_net);
> > > > > >  }
> > > > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void)
> > > > > >
> > > > > >         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
> > > > > >
> > > > > > +       init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
> > > > > > +                                   GFP_KERNEL);
> > > > >
> > > > > Why are you allocating the hash table twice ? It should be done
> > > > > already in  unix_net_init() ?
> > > >
> > > > Ah sorry, just my mistake.
> > > > I'll remove this alloc/free part.
> > > >
> > > >
> > > > > > +       if (!init_net.unx.hash)
> > > > > > +               goto out;
> > > > > > +
> > > > > > +       for (i = 0; i < UNIX_HASH_SIZE; i++) {
> > > > > > +               INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
> > > > > > +               spin_lock_init(&init_net.unx.hash[i].lock);
> > > > > > +       }
> > > > > > +
> > > > > >         for (i = 0; i < UNIX_HASH_SIZE; i++)
> > > > > >                 spin_lock_init(&unix_table_locks[i]);
> > > > > >
> > > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void)
> > > > > >         proto_unregister(&unix_dgram_proto);
> > > > > >         proto_unregister(&unix_stream_proto);
> > > > > >         unregister_pernet_subsys(&unix_net_ops);
> > > > > > +       kfree(init_net.unx.hash);
> > > > >
> > > > >    Not needed.
> > > > >
> > > > > >  }
> > > > > >
> > > > > >  /* Earlier than device_initcall() so that other drivers invoking
> > > > > > --
> > > > > > 2.30.2
> > > > > >
> > > >
> > > >
> > > > Best regards,
> > > > Kuniyuki
kernel test robot June 17, 2022, 6:17 p.m. UTC | #7
Hi Kuniyuki,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046
base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 5dcb50c009c9f8ec1cfca6a81a05c0060a5bbf68
config: hexagon-randconfig-r045-20220617 (https://download.01.org/0day-ci/archive/20220618/202206180244.SIWZxbAo-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d764aa7fc6b9cc3fbe960019018f5f9e941eb0a6)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/d0da436e1c42550dbd332f48fd11992d5f4af487
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046
        git checkout d0da436e1c42550dbd332f48fd11992d5f4af487
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash net/unix/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> net/unix/af_unix.c:3590:1: warning: unused label 'err_sysctl' [-Wunused-label]
   err_sysctl:
   ^~~~~~~~~~~
   1 warning generated.


vim +/err_sysctl +3590 net/unix/af_unix.c

  3573	
  3574		net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
  3575					GFP_KERNEL);
  3576		if (!net->unx.hash)
  3577			goto err_proc;
  3578	
  3579		for (i = 0; i < UNIX_HASH_SIZE; i++) {
  3580			INIT_HLIST_HEAD(&net->unx.hash[i].head);
  3581			spin_lock_init(&net->unx.hash[i].lock);
  3582		}
  3583	
  3584		return 0;
  3585	
  3586	err_proc:
  3587	#ifdef CONFIG_PROC_FS
  3588		remove_proc_entry("unix", net->proc_net);
  3589	#endif
> 3590	err_sysctl:
  3591		unix_sysctl_unregister(net);
  3592	out:
  3593		return -ENOMEM;
  3594	}
  3595
Kuniyuki Iwashima June 17, 2022, 8:44 p.m. UTC | #8
From:   kernel test robot <lkp@intel.com>
Date:   Sat, 18 Jun 2022 02:17:44 +0800
> Hi Kuniyuki,
> 
> Thank you for the patch! Perhaps something to improve:
> 
> [auto build test WARNING on net-next/master]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 5dcb50c009c9f8ec1cfca6a81a05c0060a5bbf68
> config: hexagon-randconfig-r045-20220617 (https://download.01.org/0day-ci/archive/20220618/202206180244.SIWZxbAo-lkp@intel.com/config)
> compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d764aa7fc6b9cc3fbe960019018f5f9e941eb0a6)
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://github.com/intel-lab-lkp/linux/commit/d0da436e1c42550dbd332f48fd11992d5f4af487
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046
>         git checkout d0da436e1c42550dbd332f48fd11992d5f4af487
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash net/unix/
> 
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot <lkp@intel.com>
> 
> All warnings (new ones prefixed by >>):
> 
> >> net/unix/af_unix.c:3590:1: warning: unused label 'err_sysctl' [-Wunused-label]
>    err_sysctl:
>    ^~~~~~~~~~~
>    1 warning generated.
> 
> 
> vim +/err_sysctl +3590 net/unix/af_unix.c
> 
>   3573	
>   3574		net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
>   3575					GFP_KERNEL);
>   3576		if (!net->unx.hash)
>   3577			goto err_proc;
>   3578	
>   3579		for (i = 0; i < UNIX_HASH_SIZE; i++) {
>   3580			INIT_HLIST_HEAD(&net->unx.hash[i].head);
>   3581			spin_lock_init(&net->unx.hash[i].lock);
>   3582		}
>   3583	
>   3584		return 0;
>   3585	
>   3586	err_proc:
>   3587	#ifdef CONFIG_PROC_FS
>   3588		remove_proc_entry("unix", net->proc_net);
>   3589	#endif
> > 3590	err_sysctl:

I will move this label into the #ifdef CONFIG_PROC_FS block above.

Thanks!


>   3591		unix_sysctl_unregister(net);
>   3592	out:
>   3593		return -ENOMEM;
>   3594	}
>   3595	
> 
> -- 
> 0-DAY CI Kernel Test Service
> https://01.org/lkp
diff mbox series

Patch

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index acb56e463db1..0a17e49af0c9 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -24,6 +24,11 @@  extern unsigned int unix_tot_inflight;
 extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
 extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
 
+struct unix_hashbucket {
+	spinlock_t		lock;
+	struct hlist_head	head;
+};
+
 struct unix_address {
 	refcount_t	refcnt;
 	int		len;
diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h
index 91a3d7e39198..975c4e3f8a5b 100644
--- a/include/net/netns/unix.h
+++ b/include/net/netns/unix.h
@@ -5,8 +5,10 @@ 
 #ifndef __NETNS_UNIX_H__
 #define __NETNS_UNIX_H__
 
+struct unix_hashbucket;
 struct ctl_table_header;
 struct netns_unix {
+	struct unix_hashbucket	*hash;
 	int			sysctl_max_dgram_qlen;
 	struct ctl_table_header	*ctl;
 };
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c0804ae9c96a..3c07702e2349 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3559,7 +3559,7 @@  static const struct net_proto_family unix_family_ops = {
 
 static int __net_init unix_net_init(struct net *net)
 {
-	int error = -ENOMEM;
+	int i;
 
 	net->unx.sysctl_max_dgram_qlen = 10;
 	if (unix_sysctl_register(net))
@@ -3567,18 +3567,35 @@  static int __net_init unix_net_init(struct net *net)
 
 #ifdef CONFIG_PROC_FS
 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
-			sizeof(struct seq_net_private))) {
-		unix_sysctl_unregister(net);
-		goto out;
+			     sizeof(struct seq_net_private)))
+		goto err_sysctl;
+#endif
+
+	net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
+				GFP_KERNEL);
+	if (!net->unx.hash)
+		goto err_proc;
+
+	for (i = 0; i < UNIX_HASH_SIZE; i++) {
+		INIT_HLIST_HEAD(&net->unx.hash[i].head);
+		spin_lock_init(&net->unx.hash[i].lock);
 	}
+
+	return 0;
+
+err_proc:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("unix", net->proc_net);
 #endif
-	error = 0;
+err_sysctl:
+	unix_sysctl_unregister(net);
 out:
-	return error;
+	return -ENOMEM;
 }
 
 static void __net_exit unix_net_exit(struct net *net)
 {
+	kfree(net->unx.hash);
 	unix_sysctl_unregister(net);
 	remove_proc_entry("unix", net->proc_net);
 }
@@ -3666,6 +3683,16 @@  static int __init af_unix_init(void)
 
 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
 
+	init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE,
+				    GFP_KERNEL);
+	if (!init_net.unx.hash)
+		goto out;
+
+	for (i = 0; i < UNIX_HASH_SIZE; i++) {
+		INIT_HLIST_HEAD(&init_net.unx.hash[i].head);
+		spin_lock_init(&init_net.unx.hash[i].lock);
+	}
+
 	for (i = 0; i < UNIX_HASH_SIZE; i++)
 		spin_lock_init(&unix_table_locks[i]);
 
@@ -3699,6 +3726,7 @@  static void __exit af_unix_exit(void)
 	proto_unregister(&unix_dgram_proto);
 	proto_unregister(&unix_stream_proto);
 	unregister_pernet_subsys(&unix_net_ops);
+	kfree(init_net.unx.hash);
 }
 
 /* Earlier than device_initcall() so that other drivers invoking