Message ID | 20220616234714.4291-4-kuniyu@amazon.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | af_unix: Introduce per-netns socket hash table. | expand |
On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > This commit adds a per netns hash table for AF_UNIX. > > Note that its size is fixed as UNIX_HASH_SIZE for now. > Note: Please include memory costs for this table, including when LOCKDEP is on. > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > --- > include/net/af_unix.h | 5 +++++ > include/net/netns/unix.h | 2 ++ > net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ > 3 files changed, 41 insertions(+), 6 deletions(-) > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > index acb56e463db1..0a17e49af0c9 100644 > --- a/include/net/af_unix.h > +++ b/include/net/af_unix.h > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; > extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; > extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; > > +struct unix_hashbucket { > + spinlock_t lock; > + struct hlist_head head; > +}; > + > struct unix_address { > refcount_t refcnt; > int len; > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h > index 91a3d7e39198..975c4e3f8a5b 100644 > --- a/include/net/netns/unix.h > +++ b/include/net/netns/unix.h > @@ -5,8 +5,10 @@ > #ifndef __NETNS_UNIX_H__ > #define __NETNS_UNIX_H__ > > +struct unix_hashbucket; > struct ctl_table_header; > struct netns_unix { > + struct unix_hashbucket *hash; > int sysctl_max_dgram_qlen; > struct ctl_table_header *ctl; > }; > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > index c0804ae9c96a..3c07702e2349 100644 > --- a/net/unix/af_unix.c > +++ b/net/unix/af_unix.c > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { > > static int __net_init unix_net_init(struct net *net) > { > - int error = -ENOMEM; > + int i; > > net->unx.sysctl_max_dgram_qlen = 10; > if (unix_sysctl_register(net)) > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) > > #ifdef CONFIG_PROC_FS > if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, > - sizeof(struct seq_net_private))) { > - unix_sysctl_unregister(net); > - goto out; > + sizeof(struct seq_net_private))) > + goto err_sysctl; > +#endif > + > + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > + GFP_KERNEL); This will fail under memory pressure. Prefer kvmalloc_array() > + if (!net->unx.hash) > + goto err_proc; > + > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > + INIT_HLIST_HEAD(&net->unx.hash[i].head); > + spin_lock_init(&net->unx.hash[i].lock); > } > + > + return 0; > + > +err_proc: > +#ifdef CONFIG_PROC_FS > + remove_proc_entry("unix", net->proc_net); > #endif > - error = 0; > +err_sysctl: > + unix_sysctl_unregister(net); > out: > - return error; > + return -ENOMEM; > } > > static void __net_exit unix_net_exit(struct net *net) > { > + kfree(net->unx.hash); kvfree() > unix_sysctl_unregister(net); > remove_proc_entry("unix", net->proc_net); > } > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) > > BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); > > + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > + GFP_KERNEL); Why are you allocating the hash table twice ? It should be done already in unix_net_init() ? > + if (!init_net.unx.hash) > + goto out; > + > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); > + spin_lock_init(&init_net.unx.hash[i].lock); > + } > + > for (i = 0; i < UNIX_HASH_SIZE; i++) > spin_lock_init(&unix_table_locks[i]); > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) > proto_unregister(&unix_dgram_proto); > proto_unregister(&unix_stream_proto); > unregister_pernet_subsys(&unix_net_ops); > + kfree(init_net.unx.hash); Not needed. > } > > /* Earlier than device_initcall() so that other drivers invoking > -- > 2.30.2 >
From: Eric Dumazet <edumazet@google.com> Date: Fri, 17 Jun 2022 06:23:37 +0200 > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > This commit adds a per netns hash table for AF_UNIX. > > > > Note that its size is fixed as UNIX_HASH_SIZE for now. > > > > Note: Please include memory costs for this table, including when LOCKDEP is on. I'm sorry but I'm not quite sure. Do you mean the table should have the size as member of its struct? Could you elaborate on memory costs and LOCKDEP? > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > --- > > include/net/af_unix.h | 5 +++++ > > include/net/netns/unix.h | 2 ++ > > net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ > > 3 files changed, 41 insertions(+), 6 deletions(-) > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > > index acb56e463db1..0a17e49af0c9 100644 > > --- a/include/net/af_unix.h > > +++ b/include/net/af_unix.h > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; > > extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; > > extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; > > > > +struct unix_hashbucket { > > + spinlock_t lock; > > + struct hlist_head head; > > +}; > > + > > struct unix_address { > > refcount_t refcnt; > > int len; > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h > > index 91a3d7e39198..975c4e3f8a5b 100644 > > --- a/include/net/netns/unix.h > > +++ b/include/net/netns/unix.h > > @@ -5,8 +5,10 @@ > > #ifndef __NETNS_UNIX_H__ > > #define __NETNS_UNIX_H__ > > > > +struct unix_hashbucket; > > struct ctl_table_header; > > struct netns_unix { > > + struct unix_hashbucket *hash; > > int sysctl_max_dgram_qlen; > > struct ctl_table_header *ctl; > > }; > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > > index c0804ae9c96a..3c07702e2349 100644 > > --- a/net/unix/af_unix.c > > +++ b/net/unix/af_unix.c > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { > > > > static int __net_init unix_net_init(struct net *net) > > { > > - int error = -ENOMEM; > > + int i; > > > > net->unx.sysctl_max_dgram_qlen = 10; > > if (unix_sysctl_register(net)) > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) > > > > #ifdef CONFIG_PROC_FS > > if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, > > - sizeof(struct seq_net_private))) { > > - unix_sysctl_unregister(net); > > - goto out; > > + sizeof(struct seq_net_private))) > > + goto err_sysctl; > > +#endif > > + > > + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > + GFP_KERNEL); > > This will fail under memory pressure. > > Prefer kvmalloc_array() Thank you for feedback! I will use it in v2. > > + if (!net->unx.hash) > > + goto err_proc; > > + > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > + INIT_HLIST_HEAD(&net->unx.hash[i].head); > > + spin_lock_init(&net->unx.hash[i].lock); > > } > > + > > + return 0; > > + > > +err_proc: > > +#ifdef CONFIG_PROC_FS > > + remove_proc_entry("unix", net->proc_net); > > #endif > > - error = 0; > > +err_sysctl: > > + unix_sysctl_unregister(net); > > out: > > - return error; > > + return -ENOMEM; > > } > > > > static void __net_exit unix_net_exit(struct net *net) > > { > > + kfree(net->unx.hash); > > kvfree() > > > unix_sysctl_unregister(net); > > remove_proc_entry("unix", net->proc_net); > > } > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) > > > > BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); > > > > + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > + GFP_KERNEL); > > Why are you allocating the hash table twice ? It should be done > already in unix_net_init() ? Ah sorry, just my mistake. I'll remove this alloc/free part. > > + if (!init_net.unx.hash) > > + goto out; > > + > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); > > + spin_lock_init(&init_net.unx.hash[i].lock); > > + } > > + > > for (i = 0; i < UNIX_HASH_SIZE; i++) > > spin_lock_init(&unix_table_locks[i]); > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) > > proto_unregister(&unix_dgram_proto); > > proto_unregister(&unix_stream_proto); > > unregister_pernet_subsys(&unix_net_ops); > > + kfree(init_net.unx.hash); > > Not needed. > > > } > > > > /* Earlier than device_initcall() so that other drivers invoking > > -- > > 2.30.2 > > Best regards, Kuniyuki
On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > From: Eric Dumazet <edumazet@google.com> > Date: Fri, 17 Jun 2022 06:23:37 +0200 > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > This commit adds a per netns hash table for AF_UNIX. > > > > > > Note that its size is fixed as UNIX_HASH_SIZE for now. > > > > > > > Note: Please include memory costs for this table, including when LOCKDEP is on. > > I'm sorry but I'm not quite sure. > Do you mean the table should have the size as member of its struct? > Could you elaborate on memory costs and LOCKDEP? I am saying that instead of two separate arrays, you are now using one array, with holes in the structure Without LOCKDEP, sizeof(spinlock_t) is 4. With LOCKDEP, sizeof(spinlock_t) is bigger. So we are trading some costs of having two shared dense arrays, and having per-netns hash tables. It would be nice to mention this trade off in the changelog, because some hosts have thousands of netns and few af_unix sockets :/ > > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > --- > > > include/net/af_unix.h | 5 +++++ > > > include/net/netns/unix.h | 2 ++ > > > net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ > > > 3 files changed, 41 insertions(+), 6 deletions(-) > > > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > > > index acb56e463db1..0a17e49af0c9 100644 > > > --- a/include/net/af_unix.h > > > +++ b/include/net/af_unix.h > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; > > > extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; > > > extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; > > > > > > +struct unix_hashbucket { > > > + spinlock_t lock; > > > + struct hlist_head head; > > > +}; > > > + > > > struct unix_address { > > > refcount_t refcnt; > > > int len; > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h > > > index 91a3d7e39198..975c4e3f8a5b 100644 > > > --- a/include/net/netns/unix.h > > > +++ b/include/net/netns/unix.h > > > @@ -5,8 +5,10 @@ > > > #ifndef __NETNS_UNIX_H__ > > > #define __NETNS_UNIX_H__ > > > > > > +struct unix_hashbucket; > > > struct ctl_table_header; > > > struct netns_unix { > > > + struct unix_hashbucket *hash; > > > int sysctl_max_dgram_qlen; > > > struct ctl_table_header *ctl; > > > }; > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > > > index c0804ae9c96a..3c07702e2349 100644 > > > --- a/net/unix/af_unix.c > > > +++ b/net/unix/af_unix.c > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { > > > > > > static int __net_init unix_net_init(struct net *net) > > > { > > > - int error = -ENOMEM; > > > + int i; > > > > > > net->unx.sysctl_max_dgram_qlen = 10; > > > if (unix_sysctl_register(net)) > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) > > > > > > #ifdef CONFIG_PROC_FS > > > if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, > > > - sizeof(struct seq_net_private))) { > > > - unix_sysctl_unregister(net); > > > - goto out; > > > + sizeof(struct seq_net_private))) > > > + goto err_sysctl; > > > +#endif > > > + > > > + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > + GFP_KERNEL); > > > > This will fail under memory pressure. > > > > Prefer kvmalloc_array() > > Thank you for feedback! > I will use it in v2. > > > > > + if (!net->unx.hash) > > > + goto err_proc; > > > + > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > + INIT_HLIST_HEAD(&net->unx.hash[i].head); > > > + spin_lock_init(&net->unx.hash[i].lock); > > > } > > > + > > > + return 0; > > > + > > > +err_proc: > > > +#ifdef CONFIG_PROC_FS > > > + remove_proc_entry("unix", net->proc_net); > > > #endif > > > - error = 0; > > > +err_sysctl: > > > + unix_sysctl_unregister(net); > > > out: > > > - return error; > > > + return -ENOMEM; > > > } > > > > > > static void __net_exit unix_net_exit(struct net *net) > > > { > > > + kfree(net->unx.hash); > > > > kvfree() > > > > > unix_sysctl_unregister(net); > > > remove_proc_entry("unix", net->proc_net); > > > } > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) > > > > > > BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); > > > > > > + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > + GFP_KERNEL); > > > > Why are you allocating the hash table twice ? It should be done > > already in unix_net_init() ? > > Ah sorry, just my mistake. > I'll remove this alloc/free part. > > > > > + if (!init_net.unx.hash) > > > + goto out; > > > + > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); > > > + spin_lock_init(&init_net.unx.hash[i].lock); > > > + } > > > + > > > for (i = 0; i < UNIX_HASH_SIZE; i++) > > > spin_lock_init(&unix_table_locks[i]); > > > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) > > > proto_unregister(&unix_dgram_proto); > > > proto_unregister(&unix_stream_proto); > > > unregister_pernet_subsys(&unix_net_ops); > > > + kfree(init_net.unx.hash); > > > > Not needed. > > > > > } > > > > > > /* Earlier than device_initcall() so that other drivers invoking > > > -- > > > 2.30.2 > > > > > > Best regards, > Kuniyuki
From: Eric Dumazet <edumazet@google.com> Date: Fri, 17 Jun 2022 08:08:32 +0200 > On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > From: Eric Dumazet <edumazet@google.com> > > Date: Fri, 17 Jun 2022 06:23:37 +0200 > > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > > > This commit adds a per netns hash table for AF_UNIX. > > > > > > > > Note that its size is fixed as UNIX_HASH_SIZE for now. > > > > > > > > > > Note: Please include memory costs for this table, including when LOCKDEP is on. > > > > I'm sorry but I'm not quite sure. > > Do you mean the table should have the size as member of its struct? > > Could you elaborate on memory costs and LOCKDEP? > > I am saying that instead of two separate arrays, you are now using one > array, with holes in the structure > > Without LOCKDEP, sizeof(spinlock_t) is 4. > With LOCKDEP, sizeof(spinlock_t) is bigger. > > So we are trading some costs of having two shared dense arrays, and > having per-netns hash tables. > > It would be nice to mention this trade off in the changelog, because > some hosts have thousands of netns and few af_unix sockets :/ Thank you for explanation in detail! I'm on the same page. How about having separate arrays like this in per-netns struct? struct unix_table { spinlock_t *locks; list_head *buckets; } > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > --- > > > > include/net/af_unix.h | 5 +++++ > > > > include/net/netns/unix.h | 2 ++ > > > > net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ > > > > 3 files changed, 41 insertions(+), 6 deletions(-) > > > > > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > > > > index acb56e463db1..0a17e49af0c9 100644 > > > > --- a/include/net/af_unix.h > > > > +++ b/include/net/af_unix.h > > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; > > > > extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; > > > > extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; > > > > > > > > +struct unix_hashbucket { > > > > + spinlock_t lock; > > > > + struct hlist_head head; > > > > +}; > > > > + > > > > struct unix_address { > > > > refcount_t refcnt; > > > > int len; > > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h > > > > index 91a3d7e39198..975c4e3f8a5b 100644 > > > > --- a/include/net/netns/unix.h > > > > +++ b/include/net/netns/unix.h > > > > @@ -5,8 +5,10 @@ > > > > #ifndef __NETNS_UNIX_H__ > > > > #define __NETNS_UNIX_H__ > > > > > > > > +struct unix_hashbucket; > > > > struct ctl_table_header; > > > > struct netns_unix { > > > > + struct unix_hashbucket *hash; > > > > int sysctl_max_dgram_qlen; > > > > struct ctl_table_header *ctl; > > > > }; > > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > > > > index c0804ae9c96a..3c07702e2349 100644 > > > > --- a/net/unix/af_unix.c > > > > +++ b/net/unix/af_unix.c > > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { > > > > > > > > static int __net_init unix_net_init(struct net *net) > > > > { > > > > - int error = -ENOMEM; > > > > + int i; > > > > > > > > net->unx.sysctl_max_dgram_qlen = 10; > > > > if (unix_sysctl_register(net)) > > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) > > > > > > > > #ifdef CONFIG_PROC_FS > > > > if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, > > > > - sizeof(struct seq_net_private))) { > > > > - unix_sysctl_unregister(net); > > > > - goto out; > > > > + sizeof(struct seq_net_private))) > > > > + goto err_sysctl; > > > > +#endif > > > > + > > > > + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > > + GFP_KERNEL); > > > > > > This will fail under memory pressure. > > > > > > Prefer kvmalloc_array() > > > > Thank you for feedback! > > I will use it in v2. > > > > > > > > + if (!net->unx.hash) > > > > + goto err_proc; > > > > + > > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > > + INIT_HLIST_HEAD(&net->unx.hash[i].head); > > > > + spin_lock_init(&net->unx.hash[i].lock); > > > > } > > > > + > > > > + return 0; > > > > + > > > > +err_proc: > > > > +#ifdef CONFIG_PROC_FS > > > > + remove_proc_entry("unix", net->proc_net); > > > > #endif > > > > - error = 0; > > > > +err_sysctl: > > > > + unix_sysctl_unregister(net); > > > > out: > > > > - return error; > > > > + return -ENOMEM; > > > > } > > > > > > > > static void __net_exit unix_net_exit(struct net *net) > > > > { > > > > + kfree(net->unx.hash); > > > > > > kvfree() > > > > > > > unix_sysctl_unregister(net); > > > > remove_proc_entry("unix", net->proc_net); > > > > } > > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) > > > > > > > > BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); > > > > > > > > + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > > + GFP_KERNEL); > > > > > > Why are you allocating the hash table twice ? It should be done > > > already in unix_net_init() ? > > > > Ah sorry, just my mistake. > > I'll remove this alloc/free part. > > > > > > > > + if (!init_net.unx.hash) > > > > + goto out; > > > > + > > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > > + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); > > > > + spin_lock_init(&init_net.unx.hash[i].lock); > > > > + } > > > > + > > > > for (i = 0; i < UNIX_HASH_SIZE; i++) > > > > spin_lock_init(&unix_table_locks[i]); > > > > > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) > > > > proto_unregister(&unix_dgram_proto); > > > > proto_unregister(&unix_stream_proto); > > > > unregister_pernet_subsys(&unix_net_ops); > > > > + kfree(init_net.unx.hash); > > > > > > Not needed. > > > > > > > } > > > > > > > > /* Earlier than device_initcall() so that other drivers invoking > > > > -- > > > > 2.30.2 > > > > > > > > > > Best regards, > > Kuniyuki
On Fri, Jun 17, 2022 at 8:57 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > From: Eric Dumazet <edumazet@google.com> > Date: Fri, 17 Jun 2022 08:08:32 +0200 > > On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > From: Eric Dumazet <edumazet@google.com> > > > Date: Fri, 17 Jun 2022 06:23:37 +0200 > > > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > > > > > This commit adds a per netns hash table for AF_UNIX. > > > > > > > > > > Note that its size is fixed as UNIX_HASH_SIZE for now. > > > > > > > > > > > > > Note: Please include memory costs for this table, including when LOCKDEP is on. > > > > > > I'm sorry but I'm not quite sure. > > > Do you mean the table should have the size as member of its struct? > > > Could you elaborate on memory costs and LOCKDEP? > > > > I am saying that instead of two separate arrays, you are now using one > > array, with holes in the structure > > > > Without LOCKDEP, sizeof(spinlock_t) is 4. > > With LOCKDEP, sizeof(spinlock_t) is bigger. > > > > So we are trading some costs of having two shared dense arrays, and > > having per-netns hash tables. > > > > It would be nice to mention this trade off in the changelog, because > > some hosts have thousands of netns and few af_unix sockets :/ > > Thank you for explanation in detail! > I'm on the same page. > How about having separate arrays like this in per-netns struct? > > struct unix_table { > spinlock_t *locks; > list_head *buckets; > } Are we sure we need per-netns locks ? I would think that sharing 256 spinlocks would be just fine, even on hosts with more than 256 cpus. > > > > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > > --- > > > > > include/net/af_unix.h | 5 +++++ > > > > > include/net/netns/unix.h | 2 ++ > > > > > net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ > > > > > 3 files changed, 41 insertions(+), 6 deletions(-) > > > > > > > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > > > > > index acb56e463db1..0a17e49af0c9 100644 > > > > > --- a/include/net/af_unix.h > > > > > +++ b/include/net/af_unix.h > > > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; > > > > > extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; > > > > > extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; > > > > > > > > > > +struct unix_hashbucket { > > > > > + spinlock_t lock; > > > > > + struct hlist_head head; > > > > > +}; > > > > > + > > > > > struct unix_address { > > > > > refcount_t refcnt; > > > > > int len; > > > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h > > > > > index 91a3d7e39198..975c4e3f8a5b 100644 > > > > > --- a/include/net/netns/unix.h > > > > > +++ b/include/net/netns/unix.h > > > > > @@ -5,8 +5,10 @@ > > > > > #ifndef __NETNS_UNIX_H__ > > > > > #define __NETNS_UNIX_H__ > > > > > > > > > > +struct unix_hashbucket; > > > > > struct ctl_table_header; > > > > > struct netns_unix { > > > > > + struct unix_hashbucket *hash; > > > > > int sysctl_max_dgram_qlen; > > > > > struct ctl_table_header *ctl; > > > > > }; > > > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > > > > > index c0804ae9c96a..3c07702e2349 100644 > > > > > --- a/net/unix/af_unix.c > > > > > +++ b/net/unix/af_unix.c > > > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { > > > > > > > > > > static int __net_init unix_net_init(struct net *net) > > > > > { > > > > > - int error = -ENOMEM; > > > > > + int i; > > > > > > > > > > net->unx.sysctl_max_dgram_qlen = 10; > > > > > if (unix_sysctl_register(net)) > > > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) > > > > > > > > > > #ifdef CONFIG_PROC_FS > > > > > if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, > > > > > - sizeof(struct seq_net_private))) { > > > > > - unix_sysctl_unregister(net); > > > > > - goto out; > > > > > + sizeof(struct seq_net_private))) > > > > > + goto err_sysctl; > > > > > +#endif > > > > > + > > > > > + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > > > + GFP_KERNEL); > > > > > > > > This will fail under memory pressure. > > > > > > > > Prefer kvmalloc_array() > > > > > > Thank you for feedback! > > > I will use it in v2. > > > > > > > > > > > + if (!net->unx.hash) > > > > > + goto err_proc; > > > > > + > > > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > > > + INIT_HLIST_HEAD(&net->unx.hash[i].head); > > > > > + spin_lock_init(&net->unx.hash[i].lock); > > > > > } > > > > > + > > > > > + return 0; > > > > > + > > > > > +err_proc: > > > > > +#ifdef CONFIG_PROC_FS > > > > > + remove_proc_entry("unix", net->proc_net); > > > > > #endif > > > > > - error = 0; > > > > > +err_sysctl: > > > > > + unix_sysctl_unregister(net); > > > > > out: > > > > > - return error; > > > > > + return -ENOMEM; > > > > > } > > > > > > > > > > static void __net_exit unix_net_exit(struct net *net) > > > > > { > > > > > + kfree(net->unx.hash); > > > > > > > > kvfree() > > > > > > > > > unix_sysctl_unregister(net); > > > > > remove_proc_entry("unix", net->proc_net); > > > > > } > > > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) > > > > > > > > > > BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); > > > > > > > > > > + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > > > + GFP_KERNEL); > > > > > > > > Why are you allocating the hash table twice ? It should be done > > > > already in unix_net_init() ? > > > > > > Ah sorry, just my mistake. > > > I'll remove this alloc/free part. > > > > > > > > > > > + if (!init_net.unx.hash) > > > > > + goto out; > > > > > + > > > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > > > + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); > > > > > + spin_lock_init(&init_net.unx.hash[i].lock); > > > > > + } > > > > > + > > > > > for (i = 0; i < UNIX_HASH_SIZE; i++) > > > > > spin_lock_init(&unix_table_locks[i]); > > > > > > > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) > > > > > proto_unregister(&unix_dgram_proto); > > > > > proto_unregister(&unix_stream_proto); > > > > > unregister_pernet_subsys(&unix_net_ops); > > > > > + kfree(init_net.unx.hash); > > > > > > > > Not needed. > > > > > > > > > } > > > > > > > > > > /* Earlier than device_initcall() so that other drivers invoking > > > > > -- > > > > > 2.30.2 > > > > > > > > > > > > > > Best regards, > > > Kuniyuki
From: Eric Dumazet <edumazet@google.com> Date: Fri, 17 Jun 2022 10:00:25 +0200 > On Fri, Jun 17, 2022 at 8:57 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > From: Eric Dumazet <edumazet@google.com> > > Date: Fri, 17 Jun 2022 08:08:32 +0200 > > > On Fri, Jun 17, 2022 at 7:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > > > From: Eric Dumazet <edumazet@google.com> > > > > Date: Fri, 17 Jun 2022 06:23:37 +0200 > > > > > On Fri, Jun 17, 2022 at 1:48 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > > > > > > > This commit adds a per netns hash table for AF_UNIX. > > > > > > > > > > > > Note that its size is fixed as UNIX_HASH_SIZE for now. > > > > > > > > > > > > > > > > Note: Please include memory costs for this table, including when LOCKDEP is on. > > > > > > > > I'm sorry but I'm not quite sure. > > > > Do you mean the table should have the size as member of its struct? > > > > Could you elaborate on memory costs and LOCKDEP? > > > > > > I am saying that instead of two separate arrays, you are now using one > > > array, with holes in the structure > > > > > > Without LOCKDEP, sizeof(spinlock_t) is 4. > > > With LOCKDEP, sizeof(spinlock_t) is bigger. > > > > > > So we are trading some costs of having two shared dense arrays, and > > > having per-netns hash tables. > > > > > > It would be nice to mention this trade off in the changelog, because > > > some hosts have thousands of netns and few af_unix sockets :/ > > > > Thank you for explanation in detail! > > I'm on the same page. > > How about having separate arrays like this in per-netns struct? > > > > struct unix_table { > > spinlock_t *locks; > > list_head *buckets; > > } > > Are we sure we need per-netns locks ? > > I would think that sharing 256 spinlocks would be just fine, even on > hosts with more than 256 cpus. I ran the test written on the last patch with three kernels 10 times for each: 1) global locks and hash table 1m 38s ~ 1m 43s 2) per-netns locks and hash tables (two dense arrays version) 11s 3) global locks and per-netns hash tables 15s As you thought, the length of list has larger impact than lock contention. But on a host with 10 cpus per-netns, per-netns locks are faster than shared one. What do you think about this trade-off? > > > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > > > --- > > > > > > include/net/af_unix.h | 5 +++++ > > > > > > include/net/netns/unix.h | 2 ++ > > > > > > net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ > > > > > > 3 files changed, 41 insertions(+), 6 deletions(-) > > > > > > > > > > > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > > > > > > index acb56e463db1..0a17e49af0c9 100644 > > > > > > --- a/include/net/af_unix.h > > > > > > +++ b/include/net/af_unix.h > > > > > > @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; > > > > > > extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; > > > > > > extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; > > > > > > > > > > > > +struct unix_hashbucket { > > > > > > + spinlock_t lock; > > > > > > + struct hlist_head head; > > > > > > +}; > > > > > > + > > > > > > struct unix_address { > > > > > > refcount_t refcnt; > > > > > > int len; > > > > > > diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h > > > > > > index 91a3d7e39198..975c4e3f8a5b 100644 > > > > > > --- a/include/net/netns/unix.h > > > > > > +++ b/include/net/netns/unix.h > > > > > > @@ -5,8 +5,10 @@ > > > > > > #ifndef __NETNS_UNIX_H__ > > > > > > #define __NETNS_UNIX_H__ > > > > > > > > > > > > +struct unix_hashbucket; > > > > > > struct ctl_table_header; > > > > > > struct netns_unix { > > > > > > + struct unix_hashbucket *hash; > > > > > > int sysctl_max_dgram_qlen; > > > > > > struct ctl_table_header *ctl; > > > > > > }; > > > > > > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > > > > > > index c0804ae9c96a..3c07702e2349 100644 > > > > > > --- a/net/unix/af_unix.c > > > > > > +++ b/net/unix/af_unix.c > > > > > > @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { > > > > > > > > > > > > static int __net_init unix_net_init(struct net *net) > > > > > > { > > > > > > - int error = -ENOMEM; > > > > > > + int i; > > > > > > > > > > > > net->unx.sysctl_max_dgram_qlen = 10; > > > > > > if (unix_sysctl_register(net)) > > > > > > @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) > > > > > > > > > > > > #ifdef CONFIG_PROC_FS > > > > > > if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, > > > > > > - sizeof(struct seq_net_private))) { > > > > > > - unix_sysctl_unregister(net); > > > > > > - goto out; > > > > > > + sizeof(struct seq_net_private))) > > > > > > + goto err_sysctl; > > > > > > +#endif > > > > > > + > > > > > > + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > > > > + GFP_KERNEL); > > > > > > > > > > This will fail under memory pressure. > > > > > > > > > > Prefer kvmalloc_array() > > > > > > > > Thank you for feedback! > > > > I will use it in v2. > > > > > > > > > > > > > > + if (!net->unx.hash) > > > > > > + goto err_proc; > > > > > > + > > > > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > > > > + INIT_HLIST_HEAD(&net->unx.hash[i].head); > > > > > > + spin_lock_init(&net->unx.hash[i].lock); > > > > > > } > > > > > > + > > > > > > + return 0; > > > > > > + > > > > > > +err_proc: > > > > > > +#ifdef CONFIG_PROC_FS > > > > > > + remove_proc_entry("unix", net->proc_net); > > > > > > #endif > > > > > > - error = 0; > > > > > > +err_sysctl: > > > > > > + unix_sysctl_unregister(net); > > > > > > out: > > > > > > - return error; > > > > > > + return -ENOMEM; > > > > > > } > > > > > > > > > > > > static void __net_exit unix_net_exit(struct net *net) > > > > > > { > > > > > > + kfree(net->unx.hash); > > > > > > > > > > kvfree() > > > > > > > > > > > unix_sysctl_unregister(net); > > > > > > remove_proc_entry("unix", net->proc_net); > > > > > > } > > > > > > @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) > > > > > > > > > > > > BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); > > > > > > > > > > > > + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > > > > > > + GFP_KERNEL); > > > > > > > > > > Why are you allocating the hash table twice ? It should be done > > > > > already in unix_net_init() ? > > > > > > > > Ah sorry, just my mistake. > > > > I'll remove this alloc/free part. > > > > > > > > > > > > > > + if (!init_net.unx.hash) > > > > > > + goto out; > > > > > > + > > > > > > + for (i = 0; i < UNIX_HASH_SIZE; i++) { > > > > > > + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); > > > > > > + spin_lock_init(&init_net.unx.hash[i].lock); > > > > > > + } > > > > > > + > > > > > > for (i = 0; i < UNIX_HASH_SIZE; i++) > > > > > > spin_lock_init(&unix_table_locks[i]); > > > > > > > > > > > > @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) > > > > > > proto_unregister(&unix_dgram_proto); > > > > > > proto_unregister(&unix_stream_proto); > > > > > > unregister_pernet_subsys(&unix_net_ops); > > > > > > + kfree(init_net.unx.hash); > > > > > > > > > > Not needed. > > > > > > > > > > > } > > > > > > > > > > > > /* Earlier than device_initcall() so that other drivers invoking > > > > > > -- > > > > > > 2.30.2 > > > > > > > > > > > > > > > > > > Best regards, > > > > Kuniyuki
Hi Kuniyuki, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on net-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046 base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 5dcb50c009c9f8ec1cfca6a81a05c0060a5bbf68 config: hexagon-randconfig-r045-20220617 (https://download.01.org/0day-ci/archive/20220618/202206180244.SIWZxbAo-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d764aa7fc6b9cc3fbe960019018f5f9e941eb0a6) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/d0da436e1c42550dbd332f48fd11992d5f4af487 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046 git checkout d0da436e1c42550dbd332f48fd11992d5f4af487 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash net/unix/ If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> net/unix/af_unix.c:3590:1: warning: unused label 'err_sysctl' [-Wunused-label] err_sysctl: ^~~~~~~~~~~ 1 warning generated. vim +/err_sysctl +3590 net/unix/af_unix.c 3573 3574 net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, 3575 GFP_KERNEL); 3576 if (!net->unx.hash) 3577 goto err_proc; 3578 3579 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3580 INIT_HLIST_HEAD(&net->unx.hash[i].head); 3581 spin_lock_init(&net->unx.hash[i].lock); 3582 } 3583 3584 return 0; 3585 3586 err_proc: 3587 #ifdef CONFIG_PROC_FS 3588 remove_proc_entry("unix", net->proc_net); 3589 #endif > 3590 err_sysctl: 3591 unix_sysctl_unregister(net); 3592 out: 3593 return -ENOMEM; 3594 } 3595
From: kernel test robot <lkp@intel.com> Date: Sat, 18 Jun 2022 02:17:44 +0800 > Hi Kuniyuki, > > Thank you for the patch! Perhaps something to improve: > > [auto build test WARNING on net-next/master] > > url: https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046 > base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 5dcb50c009c9f8ec1cfca6a81a05c0060a5bbf68 > config: hexagon-randconfig-r045-20220617 (https://download.01.org/0day-ci/archive/20220618/202206180244.SIWZxbAo-lkp@intel.com/config) > compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d764aa7fc6b9cc3fbe960019018f5f9e941eb0a6) > reproduce (this is a W=1 build): > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > chmod +x ~/bin/make.cross > # https://github.com/intel-lab-lkp/linux/commit/d0da436e1c42550dbd332f48fd11992d5f4af487 > git remote add linux-review https://github.com/intel-lab-lkp/linux > git fetch --no-tags linux-review Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220617-075046 > git checkout d0da436e1c42550dbd332f48fd11992d5f4af487 > # save the config file > mkdir build_dir && cp config build_dir/.config > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash net/unix/ > > If you fix the issue, kindly add following tag where applicable > Reported-by: kernel test robot <lkp@intel.com> > > All warnings (new ones prefixed by >>): > > >> net/unix/af_unix.c:3590:1: warning: unused label 'err_sysctl' [-Wunused-label] > err_sysctl: > ^~~~~~~~~~~ > 1 warning generated. > > > vim +/err_sysctl +3590 net/unix/af_unix.c > > 3573 > 3574 net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, > 3575 GFP_KERNEL); > 3576 if (!net->unx.hash) > 3577 goto err_proc; > 3578 > 3579 for (i = 0; i < UNIX_HASH_SIZE; i++) { > 3580 INIT_HLIST_HEAD(&net->unx.hash[i].head); > 3581 spin_lock_init(&net->unx.hash[i].lock); > 3582 } > 3583 > 3584 return 0; > 3585 > 3586 err_proc: > 3587 #ifdef CONFIG_PROC_FS > 3588 remove_proc_entry("unix", net->proc_net); > 3589 #endif > > 3590 err_sysctl: I will move this label into the #ifdef CONFIG_PROC_FS block above. Thanks! > 3591 unix_sysctl_unregister(net); > 3592 out: > 3593 return -ENOMEM; > 3594 } > 3595 > > -- > 0-DAY CI Kernel Test Service > https://01.org/lkp
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index acb56e463db1..0a17e49af0c9 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -24,6 +24,11 @@ extern unsigned int unix_tot_inflight; extern spinlock_t unix_table_locks[UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE]; +struct unix_hashbucket { + spinlock_t lock; + struct hlist_head head; +}; + struct unix_address { refcount_t refcnt; int len; diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 91a3d7e39198..975c4e3f8a5b 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,8 +5,10 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +struct unix_hashbucket; struct ctl_table_header; struct netns_unix { + struct unix_hashbucket *hash; int sysctl_max_dgram_qlen; struct ctl_table_header *ctl; }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c0804ae9c96a..3c07702e2349 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { static int __net_init unix_net_init(struct net *net) { - int error = -ENOMEM; + int i; net->unx.sysctl_max_dgram_qlen = 10; if (unix_sysctl_register(net)) @@ -3567,18 +3567,35 @@ static int __net_init unix_net_init(struct net *net) #ifdef CONFIG_PROC_FS if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, - sizeof(struct seq_net_private))) { - unix_sysctl_unregister(net); - goto out; + sizeof(struct seq_net_private))) + goto err_sysctl; +#endif + + net->unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, + GFP_KERNEL); + if (!net->unx.hash) + goto err_proc; + + for (i = 0; i < UNIX_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&net->unx.hash[i].head); + spin_lock_init(&net->unx.hash[i].lock); } + + return 0; + +err_proc: +#ifdef CONFIG_PROC_FS + remove_proc_entry("unix", net->proc_net); #endif - error = 0; +err_sysctl: + unix_sysctl_unregister(net); out: - return error; + return -ENOMEM; } static void __net_exit unix_net_exit(struct net *net) { + kfree(net->unx.hash); unix_sysctl_unregister(net); remove_proc_entry("unix", net->proc_net); } @@ -3666,6 +3683,16 @@ static int __init af_unix_init(void) BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); + init_net.unx.hash = kmalloc(sizeof(struct unix_hashbucket) * UNIX_HASH_SIZE, + GFP_KERNEL); + if (!init_net.unx.hash) + goto out; + + for (i = 0; i < UNIX_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&init_net.unx.hash[i].head); + spin_lock_init(&init_net.unx.hash[i].lock); + } + for (i = 0; i < UNIX_HASH_SIZE; i++) spin_lock_init(&unix_table_locks[i]); @@ -3699,6 +3726,7 @@ static void __exit af_unix_exit(void) proto_unregister(&unix_dgram_proto); proto_unregister(&unix_stream_proto); unregister_pernet_subsys(&unix_net_ops); + kfree(init_net.unx.hash); } /* Earlier than device_initcall() so that other drivers invoking
This commit adds a per netns hash table for AF_UNIX. Note that its size is fixed as UNIX_HASH_SIZE for now. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> --- include/net/af_unix.h | 5 +++++ include/net/netns/unix.h | 2 ++ net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++++++------ 3 files changed, 41 insertions(+), 6 deletions(-)