diff mbox series

[v3,net-next,5/6] af_unix: Put a socket into a per-netns hash table.

Message ID 20220621171913.73401-6-kuniyu@amazon.com (mailing list archive)
State Accepted
Commit cf2f225e2653734e66e91c09e1cbe004bfd3d4a7
Delegated to: Netdev Maintainers
Headers show
Series af_unix: Introduce per-netns socket hash table. | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 58 this patch: 58
netdev/cc_maintainers warning 1 maintainers not CCed: viro@zeniv.linux.org.uk
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 59 this patch: 59
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 183 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kuniyuki Iwashima June 21, 2022, 5:19 p.m. UTC
This commit replaces the global hash table with a per-netns one and removes
the global one.

We now link a socket in each netns's hash table so we can save some netns
comparisons when iterating through a hash bucket.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/af_unix.h |  1 -
 net/unix/af_unix.c    | 50 +++++++++++++++++--------------------------
 net/unix/diag.c       |  9 +++-----
 3 files changed, 23 insertions(+), 37 deletions(-)

Comments

kernel test robot July 3, 2022, 2:06 p.m. UTC | #1
Greeting,

FYI, we noticed the following commit (built with gcc-11):

commit: d66d39e4713c1c9e70965e2375b780ab9522dfb3 ("[PATCH v3 net-next 5/6] af_unix: Put a socket into a per-netns hash table.")
url: https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Introduce-per-netns-socket-hash-table/20220622-012539
base: https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git 8720bd951b8e8515ffd995c7631790fdabaa9265
patch link: https://lore.kernel.org/netdev/20220621171913.73401-6-kuniyu@amazon.com

in testcase: ltp
version: ltp-x86_64-14c1f76-1_20220625
with following parameters:

	test: net.rpc_tests
	ucode: 0x21

test-description: The LTP testsuite contains a collection of tools for testing the Linux kernel and related features.
test-url: http://linux-test-project.github.io/


on test machine: 4 threads 1 sockets Intel(R) Core(TM) i3-3220 CPU @ 3.30GHz with 8G memory

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


please be noted we also observed other three tests failed upon this commit
while pass on parent:

39d14b6772921fa2 d66d39e4713c1c9e70965e2375b
---------------- ---------------------------
       fail:runs  %reproduction    fail:runs
           |             |             |
           :6          100%           6:6     ltp.rpc_pmap_set.fail
           :6          100%           6:6     ltp.rpc_pmap_unset.fail
           :6          100%           6:6     ltp.rpc_registerrpc.fail
           :6          100%           6:6     ltp.rpc_svc_register.fail


If you fix the issue, kindly add following tag
Reported-by: kernel test robot <oliver.sang@intel.com>



<<<test_start>>>
tag=rpc_pmap_set stime=1656693410
cmdline="rpc_test.sh -c rpc_pmap_set"
contacts=""
analysis=exit
<<<test_output>>>
rpc_test 1 TINFO: initialize 'lhost' 'ltp_ns_veth2' interface
rpc_test 1 TINFO: add local addr 10.0.0.2/24
rpc_test 1 TINFO: add local addr fd00:1:1:1::2/64
rpc_test 1 TINFO: initialize 'rhost' 'ltp_ns_veth1' interface
rpc_test 1 TINFO: add remote addr 10.0.0.1/24
rpc_test 1 TINFO: add remote addr fd00:1:1:1::1/64
rpc_test 1 TINFO: Network config (local -- remote):
rpc_test 1 TINFO: ltp_ns_veth2 -- ltp_ns_veth1
rpc_test 1 TINFO: 10.0.0.2/24 -- 10.0.0.1/24
rpc_test 1 TINFO: fd00:1:1:1::2/64 -- fd00:1:1:1::1/64
rpc_test 1 TINFO: timeout per run is 0h 5m 0s
rpc_test 1 TINFO: check registered RPC with rpcinfo
rpc_test 1 TINFO: registered RPC:
   program vers proto   port  service
    100000    4   tcp    111  portmapper
    100000    3   tcp    111  portmapper
    100000    2   tcp    111  portmapper
    100000    4   udp    111  portmapper
    100000    3   udp    111  portmapper
    100000    2   udp    111  portmapper
    100001    1   udp  44050  rstatd
    100001    2   udp  44050  rstatd
    100001    3   udp  44050  rstatd
    100001    4   udp  44050  rstatd
    100001    5   udp  44050  rstatd
    100002    2   udp  35568  rusersd
    100002    3   udp  35568  rusersd
   2000333   10   udp  51134
   2000333   10   tcp  63711
rpc_test 1 TINFO: using libtirpc: yes
rpc_test 1 TFAIL: rpc_pmap_set 10.0.0.2 536875000 failed unexpectedly
1

Summary:
passed   0
failed   1
broken   0
skipped  0
warnings 0
<<<execution_status>>>
initiation_status="ok"
duration=1 termination_type=exited termination_id=1 corefile=no
cutime=7 cstime=44
<<<test_end>>>
<<<test_start>>>
tag=rpc_pmap_unset stime=1656693411
cmdline="rpc_test.sh -c rpc_pmap_unset"
contacts=""
analysis=exit
<<<test_output>>>
rpc_test 1 TINFO: initialize 'lhost' 'ltp_ns_veth2' interface
rpc_test 1 TINFO: add local addr 10.0.0.2/24
rpc_test 1 TINFO: add local addr fd00:1:1:1::2/64
rpc_test 1 TINFO: initialize 'rhost' 'ltp_ns_veth1' interface
rpc_test 1 TINFO: add remote addr 10.0.0.1/24
rpc_test 1 TINFO: add remote addr fd00:1:1:1::1/64
rpc_test 1 TINFO: Network config (local -- remote):
rpc_test 1 TINFO: ltp_ns_veth2 -- ltp_ns_veth1
rpc_test 1 TINFO: 10.0.0.2/24 -- 10.0.0.1/24
rpc_test 1 TINFO: fd00:1:1:1::2/64 -- fd00:1:1:1::1/64
rpc_test 1 TINFO: timeout per run is 0h 5m 0s
rpc_test 1 TINFO: check registered RPC with rpcinfo
rpc_test 1 TINFO: registered RPC:
   program vers proto   port  service
    100000    4   tcp    111  portmapper
    100000    3   tcp    111  portmapper
    100000    2   tcp    111  portmapper
    100000    4   udp    111  portmapper
    100000    3   udp    111  portmapper
    100000    2   udp    111  portmapper
    100001    1   udp  44050  rstatd
    100001    2   udp  44050  rstatd
    100001    3   udp  44050  rstatd
    100001    4   udp  44050  rstatd
    100001    5   udp  44050  rstatd
    100002    2   udp  35568  rusersd
    100002    3   udp  35568  rusersd
   2000333   10   udp  51134
   2000333   10   tcp  63711
rpc_test 1 TINFO: using libtirpc: yes
rpc_test 1 TFAIL: rpc_pmap_unset 10.0.0.2 536875000 failed unexpectedly
1

Summary:
passed   0
failed   1
broken   0
skipped  0
warnings 0
<<<execution_status>>>
initiation_status="ok"
duration=0 termination_type=exited termination_id=1 corefile=no
cutime=8 cstime=44
<<<test_end>>>

...

<<<test_start>>>
tag=rpc_svc_register stime=1656693442
cmdline="rpc_test.sh -c rpc_svc_register"
contacts=""
analysis=exit
<<<test_output>>>
rpc_test 1 TINFO: initialize 'lhost' 'ltp_ns_veth2' interface
rpc_test 1 TINFO: add local addr 10.0.0.2/24
rpc_test 1 TINFO: add local addr fd00:1:1:1::2/64
rpc_test 1 TINFO: initialize 'rhost' 'ltp_ns_veth1' interface
rpc_test 1 TINFO: add remote addr 10.0.0.1/24
rpc_test 1 TINFO: add remote addr fd00:1:1:1::1/64
rpc_test 1 TINFO: Network config (local -- remote):
rpc_test 1 TINFO: ltp_ns_veth2 -- ltp_ns_veth1
rpc_test 1 TINFO: 10.0.0.2/24 -- 10.0.0.1/24
rpc_test 1 TINFO: fd00:1:1:1::2/64 -- fd00:1:1:1::1/64
rpc_test 1 TINFO: timeout per run is 0h 5m 0s
rpc_test 1 TINFO: check registered RPC with rpcinfo
rpc_test 1 TINFO: registered RPC:
   program vers proto   port  service
    100000    4   tcp    111  portmapper
    100000    3   tcp    111  portmapper
    100000    2   tcp    111  portmapper
    100000    4   udp    111  portmapper
    100000    3   udp    111  portmapper
    100000    2   udp    111  portmapper
    100001    1   udp  44050  rstatd
    100001    2   udp  44050  rstatd
    100001    3   udp  44050  rstatd
    100001    4   udp  44050  rstatd
    100001    5   udp  44050  rstatd
    100002    2   udp  35568  rusersd
    100002    3   udp  35568  rusersd
   2000333   10   udp  51134
   2000333   10   tcp  63711
rpc_test 1 TINFO: using libtirpc: yes
rpc_test 1 TFAIL: rpc_svc_register 10.0.0.2 536875000 failed unexpectedly
1

Summary:
passed   0
failed   1
broken   0
skipped  0
warnings 0
<<<execution_status>>>
initiation_status="ok"
duration=1 termination_type=exited termination_id=1 corefile=no
cutime=6 cstime=45
<<<test_end>>>

...

<<<test_start>>>
tag=rpc_registerrpc stime=1656693443
cmdline="rpc_test.sh -c rpc_registerrpc"
contacts=""
analysis=exit
<<<test_output>>>
rpc_test 1 TINFO: initialize 'lhost' 'ltp_ns_veth2' interface
rpc_test 1 TINFO: add local addr 10.0.0.2/24
rpc_test 1 TINFO: add local addr fd00:1:1:1::2/64
rpc_test 1 TINFO: initialize 'rhost' 'ltp_ns_veth1' interface
rpc_test 1 TINFO: add remote addr 10.0.0.1/24
rpc_test 1 TINFO: add remote addr fd00:1:1:1::1/64
rpc_test 1 TINFO: Network config (local -- remote):
rpc_test 1 TINFO: ltp_ns_veth2 -- ltp_ns_veth1
rpc_test 1 TINFO: 10.0.0.2/24 -- 10.0.0.1/24
rpc_test 1 TINFO: fd00:1:1:1::2/64 -- fd00:1:1:1::1/64
rpc_test 1 TINFO: timeout per run is 0h 5m 0s
rpc_test 1 TINFO: check registered RPC with rpcinfo
rpc_test 1 TINFO: registered RPC:
   program vers proto   port  service
    100000    4   tcp    111  portmapper
    100000    3   tcp    111  portmapper
    100000    2   tcp    111  portmapper
    100000    4   udp    111  portmapper
    100000    3   udp    111  portmapper
    100000    2   udp    111  portmapper
    100001    1   udp  44050  rstatd
    100001    2   udp  44050  rstatd
    100001    3   udp  44050  rstatd
    100001    4   udp  44050  rstatd
    100001    5   udp  44050  rstatd
    100002    2   udp  35568  rusersd
    100002    3   udp  35568  rusersd
   2000333   10   udp  51134
   2000333   10   tcp  63711
rpc_test 1 TINFO: using libtirpc: yes
rpc_test 1 TFAIL: rpc_registerrpc 10.0.0.2 536875000 failed unexpectedly
rpc_registerrpc: rpc_reg:  couldn't register prog 536875000 vers 1 for udp
rpc_registerrpc: rpc_reg:  couldn't register prog 536875000 vers 1 for udp6
rpc_registerrpc: rpc_reg:  cant find suitable transport for udp
1

Summary:
passed   0
failed   1
broken   0
skipped  0
warnings 0
<<<execution_status>>>
initiation_status="ok"
duration=1 termination_type=exited termination_id=1 corefile=no
cutime=8 cstime=44
<<<test_end>>>



To reproduce:

        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        sudo bin/lkp install job.yaml           # job file is attached in this email
        bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
        sudo bin/lkp run generated-yaml-file

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
diff mbox series

Patch

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index acb56e463db1..b1748c9b6db2 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -22,7 +22,6 @@  struct sock *unix_peer_get(struct sock *sk);
 
 extern unsigned int unix_tot_inflight;
 extern spinlock_t unix_table_locks[UNIX_HASH_SIZE];
-extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
 
 struct unix_address {
 	refcount_t	refcnt;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 79f8fc5cdce8..9d0b07235dbc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -120,8 +120,6 @@ 
 
 spinlock_t unix_table_locks[UNIX_HASH_SIZE];
 EXPORT_SYMBOL_GPL(unix_table_locks);
-struct hlist_head unix_socket_table[UNIX_HASH_SIZE];
-EXPORT_SYMBOL_GPL(unix_socket_table);
 static atomic_long_t unix_nr_socks;
 
 /* SMP locking strategy:
@@ -308,20 +306,20 @@  static void __unix_remove_socket(struct sock *sk)
 	sk_del_node_init(sk);
 }
 
-static void __unix_insert_socket(struct sock *sk)
+static void __unix_insert_socket(struct net *net, struct sock *sk)
 {
 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
-	sk_add_node(sk, &unix_socket_table[sk->sk_hash]);
+	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
 }
 
-static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr,
-				 unsigned int hash)
+static void __unix_set_addr_hash(struct net *net, struct sock *sk,
+				 struct unix_address *addr, unsigned int hash)
 {
 	__unix_remove_socket(sk);
 	smp_store_release(&unix_sk(sk)->addr, addr);
 
 	sk->sk_hash = hash;
-	__unix_insert_socket(sk);
+	__unix_insert_socket(net, sk);
 }
 
 static void unix_remove_socket(struct net *net, struct sock *sk)
@@ -337,7 +335,7 @@  static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
 {
 	spin_lock(&unix_table_locks[sk->sk_hash]);
 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
-	__unix_insert_socket(sk);
+	__unix_insert_socket(net, sk);
 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
 	spin_unlock(&unix_table_locks[sk->sk_hash]);
 }
@@ -348,12 +346,9 @@  static struct sock *__unix_find_socket_byname(struct net *net,
 {
 	struct sock *s;
 
-	sk_for_each(s, &unix_socket_table[hash]) {
+	sk_for_each(s, &net->unx.table.buckets[hash]) {
 		struct unix_sock *u = unix_sk(s);
 
-		if (!net_eq(sock_net(s), net))
-			continue;
-
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			return s;
@@ -384,7 +379,7 @@  static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 
 	spin_lock(&unix_table_locks[hash]);
 	spin_lock(&net->unx.table.locks[hash]);
-	sk_for_each(s, &unix_socket_table[hash]) {
+	sk_for_each(s, &net->unx.table.buckets[hash]) {
 		struct dentry *dentry = unix_sk(s)->path.dentry;
 
 		if (dentry && d_backing_inode(dentry) == i) {
@@ -1140,7 +1135,7 @@  static int unix_autobind(struct sock *sk)
 		goto retry;
 	}
 
-	__unix_set_addr_hash(sk, addr, new_hash);
+	__unix_set_addr_hash(net, sk, addr, new_hash);
 	unix_table_double_unlock(net, old_hash, new_hash);
 	err = 0;
 
@@ -1199,7 +1194,7 @@  static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
 	unix_table_double_lock(net, old_hash, new_hash);
 	u->path.mnt = mntget(parent.mnt);
 	u->path.dentry = dget(dentry);
-	__unix_set_addr_hash(sk, addr, new_hash);
+	__unix_set_addr_hash(net, sk, addr, new_hash);
 	unix_table_double_unlock(net, old_hash, new_hash);
 	mutex_unlock(&u->bindlock);
 	done_path_create(&parent, dentry);
@@ -1246,7 +1241,7 @@  static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
 		goto out_spin;
 
-	__unix_set_addr_hash(sk, addr, new_hash);
+	__unix_set_addr_hash(net, sk, addr, new_hash);
 	unix_table_double_unlock(net, old_hash, new_hash);
 	mutex_unlock(&u->bindlock);
 	return 0;
@@ -3239,12 +3234,11 @@  static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
 {
 	unsigned long offset = get_offset(*pos);
 	unsigned long bucket = get_bucket(*pos);
-	struct sock *sk;
 	unsigned long count = 0;
+	struct sock *sk;
 
-	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
-		if (sock_net(sk) != seq_file_net(seq))
-			continue;
+	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
+	     sk; sk = sk_next(sk)) {
 		if (++count == offset)
 			break;
 	}
@@ -3279,13 +3273,13 @@  static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
 				  loff_t *pos)
 {
 	unsigned long bucket = get_bucket(*pos);
-	struct net *net = seq_file_net(seq);
 
-	for (sk = sk_next(sk); sk; sk = sk_next(sk))
-		if (sock_net(sk) == net)
-			return sk;
+	sk = sk_next(sk);
+	if (sk)
+		return sk;
+
 
-	spin_unlock(&net->unx.table.locks[bucket]);
+	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
 	spin_unlock(&unix_table_locks[bucket]);
 
 	*pos = set_bucket_offset(++bucket, 1);
@@ -3406,7 +3400,6 @@  static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
 
 {
 	struct bpf_unix_iter_state *iter = seq->private;
-	struct net *net = seq_file_net(seq);
 	unsigned int expected = 1;
 	struct sock *sk;
 
@@ -3414,9 +3407,6 @@  static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
 	iter->batch[iter->end_sk++] = start_sk;
 
 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
-		if (sock_net(sk) != net)
-			continue;
-
 		if (iter->end_sk < iter->max_sk) {
 			sock_hold(sk);
 			iter->batch[iter->end_sk++] = sk;
@@ -3425,7 +3415,7 @@  static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
 		expected++;
 	}
 
-	spin_unlock(&net->unx.table.locks[start_sk->sk_hash]);
+	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
 	spin_unlock(&unix_table_locks[start_sk->sk_hash]);
 
 	return expected;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 7fc377435114..4d0f0ca6a1eb 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -210,9 +210,7 @@  static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		num = 0;
 		spin_lock(&unix_table_locks[slot]);
 		spin_lock(&net->unx.table.locks[slot]);
-		sk_for_each(sk, &unix_socket_table[slot]) {
-			if (!net_eq(sock_net(sk), net))
-				continue;
+		sk_for_each(sk, &net->unx.table.buckets[slot]) {
 			if (num < s_num)
 				goto next;
 			if (!(req->udiag_states & (1 << sk->sk_state)))
@@ -246,13 +244,14 @@  static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino)
 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
 		spin_lock(&unix_table_locks[i]);
 		spin_lock(&net->unx.table.locks[i]);
-		sk_for_each(sk, &unix_socket_table[i])
+		sk_for_each(sk, &net->unx.table.buckets[i]) {
 			if (ino == sock_i_ino(sk)) {
 				sock_hold(sk);
 				spin_unlock(&net->unx.table.locks[i]);
 				spin_unlock(&unix_table_locks[i]);
 				return sk;
 			}
+		}
 		spin_unlock(&net->unx.table.locks[i]);
 		spin_unlock(&unix_table_locks[i]);
 	}
@@ -277,8 +276,6 @@  static int unix_diag_get_exact(struct sk_buff *in_skb,
 	err = -ENOENT;
 	if (sk == NULL)
 		goto out_nosk;
-	if (!net_eq(sock_net(sk), net))
-		goto out;
 
 	err = sock_diag_check_cookie(sk, req->udiag_cookie);
 	if (err)