Message ID | 20231030110020.45627-13-sprasad@microsoft.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/14] cifs: print server capabilities in DebugData | expand |
nspmangalore@gmail.com writes: > From: Shyam Prasad N <sprasad@microsoft.com> > > With multichannel, it is useful to know the src port details > for each channel. This change will print the ip addr and > port details for both the socket dest and src endpoints. > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> > --- > fs/smb/client/cifs_debug.c | 73 ++++++++++++++++++++++++++++++++++++-- > 1 file changed, 71 insertions(+), 2 deletions(-) > > diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c > index e23fcabb78d6..d8362e098310 100644 > --- a/fs/smb/client/cifs_debug.c > +++ b/fs/smb/client/cifs_debug.c > @@ -13,6 +13,7 @@ > #include <linux/proc_fs.h> > #include <linux/uaccess.h> > #include <uapi/linux/ethtool.h> > +#include <net/inet_sock.h> > #include "cifspdu.h" > #include "cifsglob.h" > #include "cifsproto.h" > @@ -158,11 +159,37 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) > in_flight(server), > atomic_read(&server->in_send), > atomic_read(&server->num_waiters)); > + > #ifdef CONFIG_NET_NS > if (server->net) > seq_printf(m, " Net namespace: %u ", server->net->ns.inum); > #endif /* NET_NS */ > > +#ifdef CONFIG_CIFS_SMB_DIRECT No need to check for CONFIG_CIFS_SMB_DIRECT as @server->{rdma,smbd_conn} are always defined. > + if (!server->rdma) cifs_rdma_enabled()? To be consistent with other places. > + goto skip_rdma; > + > + if (server->smbd_conn && server->smbd_conn->id) { > + struct rdma_addr *addr = > + &server->smbd_conn->id->route.addr; > + seq_printf(m, "\n\t\tIP addr: dst: %pISpc, src: %pISpc", > + &addr->dst_addr, &addr->src_addr); > + } > + > +skip_rdma: > +#endif The goto is no longer necessary when removing above #ifdef. > + if (server->ssocket) { > + struct sockaddr src; > + int addrlen; > + > + addrlen = kernel_getsockname(server->ssocket, &src); > + if (addrlen != sizeof(struct sockaddr_in) && > + addrlen != sizeof(struct sockaddr_in6)) > + return; > + > + seq_printf(m, "\n\t\tIP addr: dst: %pISpc, src: %pISpc", > + &server->dstaddr, &src); > + } > } > > static inline const char *smb_speed_to_str(size_t bps) > @@ -279,7 +306,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) > static int cifs_debug_data_proc_show(struct seq_file *m, void *v) > { > struct mid_q_entry *mid_entry; > - struct TCP_Server_Info *server; > + struct TCP_Server_Info *server, *nserver; > struct TCP_Server_Info *chan_server; > struct cifs_ses *ses; > struct cifs_tcon *tcon; > @@ -336,7 +363,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) > > c = 0; > spin_lock(&cifs_tcp_ses_lock); > - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { > + list_for_each_entry_safe(server, nserver, &cifs_tcp_ses_list, tcp_ses_list) { > /* channel info will be printed as a part of sessions below */ > if (SERVER_IS_CHAN(server)) > continue; > @@ -414,8 +441,39 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) > seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x", > atomic_read(&server->smbd_conn->mr_ready_count), > atomic_read(&server->smbd_conn->mr_used_count)); > + if (server->smbd_conn->id) { > + struct rdma_addr *addr = > + &server->smbd_conn->id->route.addr; > + seq_printf(m, "\nIP addr: dst: %pISpc, src: %pISpc", > + &addr->dst_addr, &addr->src_addr); > + } > skip_rdma: > #endif > + if (server->ssocket) { > + struct sockaddr src; > + int addrlen; > + > + /* kernel_getsockname can block. so drop the lock first */ > + server->srv_count++; > + spin_unlock(&cifs_tcp_ses_lock); > + > + addrlen = kernel_getsockname(server->ssocket, &src); > + if (addrlen != sizeof(struct sockaddr_in) && > + addrlen != sizeof(struct sockaddr_in6)) { > + cifs_put_tcp_session(server, 0); > + spin_lock(&cifs_tcp_ses_lock); > + > + goto skip_addr_details; > + } What about addrlen = kernel_getsockname(server->ssocket, &src); if (addrlen == sizeof(struct sockaddr_in) && addrlen == sizeof(struct sockaddr_in6)) { seq_printf(m, "\nIP addr: dst: %pISpc, src: %pISpc", &server->dstaddr, &src); cifs_put_tcp_session(server, 0); spin_lock(&cifs_tcp_ses_lock); } > + > + seq_printf(m, "\nIP addr: dst: %pISpc, src: %pISpc", > + &server->dstaddr, &src); > + > + cifs_put_tcp_session(server, 0); > + spin_lock(&cifs_tcp_ses_lock); > + } > + > +skip_addr_details: Then you can get rid of this goto as well. > seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", > server->credits, > server->echo_credits, > @@ -515,7 +573,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) > seq_printf(m, "\n\n\tExtra Channels: %zu ", > ses->chan_count-1); > for (j = 1; j < ses->chan_count; j++) { > + /* > + * kernel_getsockname can block inside > + * cifs_dump_channel. so drop the lock first > + */ > + server->srv_count++; > + spin_unlock(&cifs_tcp_ses_lock); > + > cifs_dump_channel(m, j, &ses->chans[j]); > + > + cifs_put_tcp_session(server, 0); > + spin_lock(&cifs_tcp_ses_lock); Here you are re-acquiring @cifs_tcp_ses_lock spinlock under @ses->chan_lock, which will introduce deadlocks in threads calling cifs_match_super(), cifs_signal_cifsd_for_reconnect(), cifs_mark_tcp_ses_conns_for_reconnect(), cifs_find_smb_ses(), ...
Paulo Alcantara <pc@manguebit.com> writes: >> @@ -515,7 +573,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) >> seq_printf(m, "\n\n\tExtra Channels: %zu ", >> ses->chan_count-1); >> for (j = 1; j < ses->chan_count; j++) { >> + /* >> + * kernel_getsockname can block inside >> + * cifs_dump_channel. so drop the lock first >> + */ >> + server->srv_count++; >> + spin_unlock(&cifs_tcp_ses_lock); >> + >> cifs_dump_channel(m, j, &ses->chans[j]); >> + >> + cifs_put_tcp_session(server, 0); >> + spin_lock(&cifs_tcp_ses_lock); > > Here you are re-acquiring @cifs_tcp_ses_lock spinlock under > @ses->chan_lock, which will introduce deadlocks in threads calling > cifs_match_super(), cifs_signal_cifsd_for_reconnect(), > cifs_mark_tcp_ses_conns_for_reconnect(), cifs_find_smb_ses(), ... A simple reproducer $ mount.cifs //srv/share /mnt -o ...,multichannel $ cat /proc/fs/cifs/DebugData [ 1293.512572] BUG: sleeping function called from invalid context at net/core/sock.c:3507 [ 1293.513915] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1068, name: cat [ 1293.515381] preempt_count: 1, expected: 0 [ 1293.516321] RCU nest depth: 0, expected: 0 [ 1293.517294] 3 locks held by cat/1068: [ 1293.518165] #0: ffff88800818fc48 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x59/0x470 [ 1293.519383] #1: ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] [ 1293.520865] #2: ffff888011c9a540 (sk_lock-AF_INET-CIFS){+.+.}-{0:0}, at: inet_getname+0x29/0xa0 [ 1293.522098] CPU: 3 PID: 1068 Comm: cat Not tainted 6.6.0-rc7 #2 [ 1293.522901] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 [ 1293.524368] Call Trace: [ 1293.524711] <TASK> [ 1293.525015] dump_stack_lvl+0x64/0x80 [ 1293.525519] __might_resched+0x173/0x280 [ 1293.526059] lock_sock_nested+0x43/0x80 [ 1293.526578] ? inet_getname+0x29/0xa0 [ 1293.527097] inet_getname+0x29/0xa0 [ 1293.527584] cifs_debug_data_proc_show+0xcf9/0xee0 [cifs] [ 1293.528360] seq_read_iter+0x118/0x470 [ 1293.528877] proc_reg_read_iter+0x53/0x90 [ 1293.529419] ? srso_alias_return_thunk+0x5/0x7f [ 1293.530037] vfs_read+0x201/0x350 [ 1293.530507] ksys_read+0x75/0x100 [ 1293.530968] do_syscall_64+0x3f/0x90 [ 1293.531461] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 1293.532138] RIP: 0033:0x7f71d767e381 [ 1293.532630] Code: ff ff eb c3 e8 0e ea 01 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 90 90 80 3d a5 f6 0e 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 [ 1293.535095] RSP: 002b:00007ffc312d65a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 1293.536106] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f71d767e381 [ 1293.537056] RDX: 0000000000020000 RSI: 00007f71d74f8000 RDI: 0000000000000003 [ 1293.538003] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 [ 1293.538957] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f71d74f8000 [ 1293.539908] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000020000 [ 1293.540877] </TASK> [ 1293.541233] [ 1293.541449] ====================================================== [ 1293.542270] WARNING: possible circular locking dependency detected [ 1293.543098] 6.6.0-rc7 #2 Tainted: G W [ 1293.543782] ------------------------------------------------------ [ 1293.544606] cat/1068 is trying to acquire lock: [ 1293.545214] ffffffffc015b5f8 (&cifs_tcp_ses_lock){+.+.}-{2:2}, at: cifs_put_tcp_session+0x1c/0x180 [cifs] [ 1293.546516] [ 1293.546516] but task is already holding lock: [ 1293.547292] ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] [ 1293.548454] [ 1293.548454] which lock already depends on the new lock. [ 1293.548454] [ 1293.549350] [ 1293.549350] the existing dependency chain (in reverse order) is: [ 1293.550183] [ 1293.550183] -> #1 (&ret_buf->chan_lock){+.+.}-{2:2}: [ 1293.550899] _raw_spin_lock+0x34/0x80 [ 1293.551401] cifs_debug_data_proc_show+0x555/0xee0 [cifs] [ 1293.552082] seq_read_iter+0x118/0x470 [ 1293.552556] proc_reg_read_iter+0x53/0x90 [ 1293.553054] vfs_read+0x201/0x350 [ 1293.553490] ksys_read+0x75/0x100 [ 1293.553925] do_syscall_64+0x3f/0x90 [ 1293.554389] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 1293.555004] [ 1293.555004] -> #0 (&cifs_tcp_ses_lock){+.+.}-{2:2}: [ 1293.555709] __lock_acquire+0x1521/0x2660 [ 1293.556218] lock_acquire+0xbf/0x2b0 [ 1293.556680] _raw_spin_lock+0x34/0x80 [ 1293.557148] cifs_put_tcp_session+0x1c/0x180 [cifs] [ 1293.557773] cifs_debug_data_proc_show+0xd15/0xee0 [cifs] [ 1293.558463] seq_read_iter+0x118/0x470 [ 1293.558945] proc_reg_read_iter+0x53/0x90 [ 1293.559450] vfs_read+0x201/0x350 [ 1293.559882] ksys_read+0x75/0x100 [ 1293.560317] do_syscall_64+0x3f/0x90 [ 1293.560773] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 1293.561390] [ 1293.561390] other info that might help us debug this: [ 1293.561390] [ 1293.562267] Possible unsafe locking scenario: [ 1293.562267] [ 1293.562927] CPU0 CPU1 [ 1293.563394] ---- ---- [ 1293.563754] lock(&ret_buf->chan_lock); [ 1293.564068] lock(&cifs_tcp_ses_lock); [ 1293.564573] lock(&ret_buf->chan_lock); [ 1293.565077] lock(&cifs_tcp_ses_lock); [ 1293.565387] [ 1293.565387] *** DEADLOCK *** [ 1293.565387] [ 1293.565852] 2 locks held by cat/1068: [ 1293.566147] #0: ffff88800818fc48 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x59/0x470 [ 1293.566767] #1: ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] [ 1293.567611] [ 1293.567611] stack backtrace: [ 1293.567954] CPU: 3 PID: 1068 Comm: cat Tainted: G W 6.6.0-rc7 #2 [ 1293.568536] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 [ 1293.569387] Call Trace: [ 1293.569585] <TASK> [ 1293.569755] dump_stack_lvl+0x4a/0x80 [ 1293.570047] check_noncircular+0x14e/0x170 [ 1293.570373] ? save_trace+0x3e/0x390 [ 1293.570659] __lock_acquire+0x1521/0x2660 [ 1293.570982] lock_acquire+0xbf/0x2b0 [ 1293.571268] ? cifs_put_tcp_session+0x1c/0x180 [cifs] [ 1293.571687] _raw_spin_lock+0x34/0x80 [ 1293.571977] ? cifs_put_tcp_session+0x1c/0x180 [cifs] [ 1293.572394] cifs_put_tcp_session+0x1c/0x180 [cifs] [ 1293.572795] cifs_debug_data_proc_show+0xd15/0xee0 [cifs] [ 1293.573241] seq_read_iter+0x118/0x470 [ 1293.573546] proc_reg_read_iter+0x53/0x90 [ 1293.573861] ? srso_alias_return_thunk+0x5/0x7f [ 1293.574218] vfs_read+0x201/0x350 [ 1293.574489] ksys_read+0x75/0x100 [ 1293.574752] do_syscall_64+0x3f/0x90 [ 1293.575030] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 1293.575428] RIP: 0033:0x7f71d767e381 [ 1293.575716] Code: ff ff eb c3 e8 0e ea 01 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 90 90 80 3d a5 f6 0e 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 [ 1293.577151] RSP: 002b:00007ffc312d65a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 1293.577736] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f71d767e381 [ 1293.578286] RDX: 0000000000020000 RSI: 00007f71d74f8000 RDI: 0000000000000003 [ 1293.578839] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 [ 1293.579391] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f71d74f8000 [ 1293.579951] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000020000 [ 1293.580511] </TASK>
removed this patch from for-next until patch updated to address this On Wed, Nov 1, 2023 at 9:12 AM Paulo Alcantara <pc@manguebit.com> wrote: > > Paulo Alcantara <pc@manguebit.com> writes: > > >> @@ -515,7 +573,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) > >> seq_printf(m, "\n\n\tExtra Channels: %zu ", > >> ses->chan_count-1); > >> for (j = 1; j < ses->chan_count; j++) { > >> + /* > >> + * kernel_getsockname can block inside > >> + * cifs_dump_channel. so drop the lock first > >> + */ > >> + server->srv_count++; > >> + spin_unlock(&cifs_tcp_ses_lock); > >> + > >> cifs_dump_channel(m, j, &ses->chans[j]); > >> + > >> + cifs_put_tcp_session(server, 0); > >> + spin_lock(&cifs_tcp_ses_lock); > > > > Here you are re-acquiring @cifs_tcp_ses_lock spinlock under > > @ses->chan_lock, which will introduce deadlocks in threads calling > > cifs_match_super(), cifs_signal_cifsd_for_reconnect(), > > cifs_mark_tcp_ses_conns_for_reconnect(), cifs_find_smb_ses(), ... > > A simple reproducer > > $ mount.cifs //srv/share /mnt -o ...,multichannel > $ cat /proc/fs/cifs/DebugData > > [ 1293.512572] BUG: sleeping function called from invalid context at net/core/sock.c:3507 > [ 1293.513915] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1068, name: cat > [ 1293.515381] preempt_count: 1, expected: 0 > [ 1293.516321] RCU nest depth: 0, expected: 0 > [ 1293.517294] 3 locks held by cat/1068: > [ 1293.518165] #0: ffff88800818fc48 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x59/0x470 > [ 1293.519383] #1: ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.520865] #2: ffff888011c9a540 (sk_lock-AF_INET-CIFS){+.+.}-{0:0}, at: inet_getname+0x29/0xa0 > [ 1293.522098] CPU: 3 PID: 1068 Comm: cat Not tainted 6.6.0-rc7 #2 > [ 1293.522901] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 > [ 1293.524368] Call Trace: > [ 1293.524711] <TASK> > [ 1293.525015] dump_stack_lvl+0x64/0x80 > [ 1293.525519] __might_resched+0x173/0x280 > [ 1293.526059] lock_sock_nested+0x43/0x80 > [ 1293.526578] ? inet_getname+0x29/0xa0 > [ 1293.527097] inet_getname+0x29/0xa0 > [ 1293.527584] cifs_debug_data_proc_show+0xcf9/0xee0 [cifs] > [ 1293.528360] seq_read_iter+0x118/0x470 > [ 1293.528877] proc_reg_read_iter+0x53/0x90 > [ 1293.529419] ? srso_alias_return_thunk+0x5/0x7f > [ 1293.530037] vfs_read+0x201/0x350 > [ 1293.530507] ksys_read+0x75/0x100 > [ 1293.530968] do_syscall_64+0x3f/0x90 > [ 1293.531461] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.532138] RIP: 0033:0x7f71d767e381 > [ 1293.532630] Code: ff ff eb c3 e8 0e ea 01 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 90 90 80 3d a5 f6 0e 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 > [ 1293.535095] RSP: 002b:00007ffc312d65a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 > [ 1293.536106] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f71d767e381 > [ 1293.537056] RDX: 0000000000020000 RSI: 00007f71d74f8000 RDI: 0000000000000003 > [ 1293.538003] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 > [ 1293.538957] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f71d74f8000 > [ 1293.539908] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000020000 > [ 1293.540877] </TASK> > [ 1293.541233] > [ 1293.541449] ====================================================== > [ 1293.542270] WARNING: possible circular locking dependency detected > [ 1293.543098] 6.6.0-rc7 #2 Tainted: G W > [ 1293.543782] ------------------------------------------------------ > [ 1293.544606] cat/1068 is trying to acquire lock: > [ 1293.545214] ffffffffc015b5f8 (&cifs_tcp_ses_lock){+.+.}-{2:2}, at: cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.546516] > [ 1293.546516] but task is already holding lock: > [ 1293.547292] ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.548454] > [ 1293.548454] which lock already depends on the new lock. > [ 1293.548454] > [ 1293.549350] > [ 1293.549350] the existing dependency chain (in reverse order) is: > [ 1293.550183] > [ 1293.550183] -> #1 (&ret_buf->chan_lock){+.+.}-{2:2}: > [ 1293.550899] _raw_spin_lock+0x34/0x80 > [ 1293.551401] cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.552082] seq_read_iter+0x118/0x470 > [ 1293.552556] proc_reg_read_iter+0x53/0x90 > [ 1293.553054] vfs_read+0x201/0x350 > [ 1293.553490] ksys_read+0x75/0x100 > [ 1293.553925] do_syscall_64+0x3f/0x90 > [ 1293.554389] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.555004] > [ 1293.555004] -> #0 (&cifs_tcp_ses_lock){+.+.}-{2:2}: > [ 1293.555709] __lock_acquire+0x1521/0x2660 > [ 1293.556218] lock_acquire+0xbf/0x2b0 > [ 1293.556680] _raw_spin_lock+0x34/0x80 > [ 1293.557148] cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.557773] cifs_debug_data_proc_show+0xd15/0xee0 [cifs] > [ 1293.558463] seq_read_iter+0x118/0x470 > [ 1293.558945] proc_reg_read_iter+0x53/0x90 > [ 1293.559450] vfs_read+0x201/0x350 > [ 1293.559882] ksys_read+0x75/0x100 > [ 1293.560317] do_syscall_64+0x3f/0x90 > [ 1293.560773] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.561390] > [ 1293.561390] other info that might help us debug this: > [ 1293.561390] > [ 1293.562267] Possible unsafe locking scenario: > [ 1293.562267] > [ 1293.562927] CPU0 CPU1 > [ 1293.563394] ---- ---- > [ 1293.563754] lock(&ret_buf->chan_lock); > [ 1293.564068] lock(&cifs_tcp_ses_lock); > [ 1293.564573] lock(&ret_buf->chan_lock); > [ 1293.565077] lock(&cifs_tcp_ses_lock); > [ 1293.565387] > [ 1293.565387] *** DEADLOCK *** > [ 1293.565387] > [ 1293.565852] 2 locks held by cat/1068: > [ 1293.566147] #0: ffff88800818fc48 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x59/0x470 > [ 1293.566767] #1: ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.567611] > [ 1293.567611] stack backtrace: > [ 1293.567954] CPU: 3 PID: 1068 Comm: cat Tainted: G W 6.6.0-rc7 #2 > [ 1293.568536] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 > [ 1293.569387] Call Trace: > [ 1293.569585] <TASK> > [ 1293.569755] dump_stack_lvl+0x4a/0x80 > [ 1293.570047] check_noncircular+0x14e/0x170 > [ 1293.570373] ? save_trace+0x3e/0x390 > [ 1293.570659] __lock_acquire+0x1521/0x2660 > [ 1293.570982] lock_acquire+0xbf/0x2b0 > [ 1293.571268] ? cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.571687] _raw_spin_lock+0x34/0x80 > [ 1293.571977] ? cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.572394] cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.572795] cifs_debug_data_proc_show+0xd15/0xee0 [cifs] > [ 1293.573241] seq_read_iter+0x118/0x470 > [ 1293.573546] proc_reg_read_iter+0x53/0x90 > [ 1293.573861] ? srso_alias_return_thunk+0x5/0x7f > [ 1293.574218] vfs_read+0x201/0x350 > [ 1293.574489] ksys_read+0x75/0x100 > [ 1293.574752] do_syscall_64+0x3f/0x90 > [ 1293.575030] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.575428] RIP: 0033:0x7f71d767e381 > [ 1293.575716] Code: ff ff eb c3 e8 0e ea 01 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 90 90 80 3d a5 f6 0e 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 > [ 1293.577151] RSP: 002b:00007ffc312d65a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 > [ 1293.577736] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f71d767e381 > [ 1293.578286] RDX: 0000000000020000 RSI: 00007f71d74f8000 RDI: 0000000000000003 > [ 1293.578839] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 > [ 1293.579391] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f71d74f8000 > [ 1293.579951] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000020000 > [ 1293.580511] </TASK>
On Wed, Nov 1, 2023 at 7:42 PM Paulo Alcantara <pc@manguebit.com> wrote: > > Paulo Alcantara <pc@manguebit.com> writes: > > >> @@ -515,7 +573,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) > >> seq_printf(m, "\n\n\tExtra Channels: %zu ", > >> ses->chan_count-1); > >> for (j = 1; j < ses->chan_count; j++) { > >> + /* > >> + * kernel_getsockname can block inside > >> + * cifs_dump_channel. so drop the lock first > >> + */ > >> + server->srv_count++; > >> + spin_unlock(&cifs_tcp_ses_lock); > >> + > >> cifs_dump_channel(m, j, &ses->chans[j]); > >> + > >> + cifs_put_tcp_session(server, 0); > >> + spin_lock(&cifs_tcp_ses_lock); > > > > Here you are re-acquiring @cifs_tcp_ses_lock spinlock under > > @ses->chan_lock, which will introduce deadlocks in threads calling > > cifs_match_super(), cifs_signal_cifsd_for_reconnect(), > > cifs_mark_tcp_ses_conns_for_reconnect(), cifs_find_smb_ses(), ... > Good points. I'm just wondering why I'm unable to repro the same though. I have lockdep enabled on my kernel too. But the same steps do not throw this warning. > A simple reproducer > > $ mount.cifs //srv/share /mnt -o ...,multichannel > $ cat /proc/fs/cifs/DebugData > > [ 1293.512572] BUG: sleeping function called from invalid context at net/core/sock.c:3507 > [ 1293.513915] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1068, name: cat > [ 1293.515381] preempt_count: 1, expected: 0 > [ 1293.516321] RCU nest depth: 0, expected: 0 > [ 1293.517294] 3 locks held by cat/1068: > [ 1293.518165] #0: ffff88800818fc48 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x59/0x470 > [ 1293.519383] #1: ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.520865] #2: ffff888011c9a540 (sk_lock-AF_INET-CIFS){+.+.}-{0:0}, at: inet_getname+0x29/0xa0 > [ 1293.522098] CPU: 3 PID: 1068 Comm: cat Not tainted 6.6.0-rc7 #2 > [ 1293.522901] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 > [ 1293.524368] Call Trace: > [ 1293.524711] <TASK> > [ 1293.525015] dump_stack_lvl+0x64/0x80 > [ 1293.525519] __might_resched+0x173/0x280 > [ 1293.526059] lock_sock_nested+0x43/0x80 > [ 1293.526578] ? inet_getname+0x29/0xa0 > [ 1293.527097] inet_getname+0x29/0xa0 > [ 1293.527584] cifs_debug_data_proc_show+0xcf9/0xee0 [cifs] > [ 1293.528360] seq_read_iter+0x118/0x470 > [ 1293.528877] proc_reg_read_iter+0x53/0x90 > [ 1293.529419] ? srso_alias_return_thunk+0x5/0x7f > [ 1293.530037] vfs_read+0x201/0x350 > [ 1293.530507] ksys_read+0x75/0x100 > [ 1293.530968] do_syscall_64+0x3f/0x90 > [ 1293.531461] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.532138] RIP: 0033:0x7f71d767e381 > [ 1293.532630] Code: ff ff eb c3 e8 0e ea 01 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 90 90 80 3d a5 f6 0e 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 > [ 1293.535095] RSP: 002b:00007ffc312d65a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 > [ 1293.536106] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f71d767e381 > [ 1293.537056] RDX: 0000000000020000 RSI: 00007f71d74f8000 RDI: 0000000000000003 > [ 1293.538003] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 > [ 1293.538957] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f71d74f8000 > [ 1293.539908] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000020000 > [ 1293.540877] </TASK> > [ 1293.541233] > [ 1293.541449] ====================================================== > [ 1293.542270] WARNING: possible circular locking dependency detected > [ 1293.543098] 6.6.0-rc7 #2 Tainted: G W > [ 1293.543782] ------------------------------------------------------ > [ 1293.544606] cat/1068 is trying to acquire lock: > [ 1293.545214] ffffffffc015b5f8 (&cifs_tcp_ses_lock){+.+.}-{2:2}, at: cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.546516] > [ 1293.546516] but task is already holding lock: > [ 1293.547292] ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.548454] > [ 1293.548454] which lock already depends on the new lock. > [ 1293.548454] > [ 1293.549350] > [ 1293.549350] the existing dependency chain (in reverse order) is: > [ 1293.550183] > [ 1293.550183] -> #1 (&ret_buf->chan_lock){+.+.}-{2:2}: > [ 1293.550899] _raw_spin_lock+0x34/0x80 > [ 1293.551401] cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.552082] seq_read_iter+0x118/0x470 > [ 1293.552556] proc_reg_read_iter+0x53/0x90 > [ 1293.553054] vfs_read+0x201/0x350 > [ 1293.553490] ksys_read+0x75/0x100 > [ 1293.553925] do_syscall_64+0x3f/0x90 > [ 1293.554389] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.555004] > [ 1293.555004] -> #0 (&cifs_tcp_ses_lock){+.+.}-{2:2}: > [ 1293.555709] __lock_acquire+0x1521/0x2660 > [ 1293.556218] lock_acquire+0xbf/0x2b0 > [ 1293.556680] _raw_spin_lock+0x34/0x80 > [ 1293.557148] cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.557773] cifs_debug_data_proc_show+0xd15/0xee0 [cifs] > [ 1293.558463] seq_read_iter+0x118/0x470 > [ 1293.558945] proc_reg_read_iter+0x53/0x90 > [ 1293.559450] vfs_read+0x201/0x350 > [ 1293.559882] ksys_read+0x75/0x100 > [ 1293.560317] do_syscall_64+0x3f/0x90 > [ 1293.560773] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.561390] > [ 1293.561390] other info that might help us debug this: > [ 1293.561390] > [ 1293.562267] Possible unsafe locking scenario: > [ 1293.562267] > [ 1293.562927] CPU0 CPU1 > [ 1293.563394] ---- ---- > [ 1293.563754] lock(&ret_buf->chan_lock); > [ 1293.564068] lock(&cifs_tcp_ses_lock); > [ 1293.564573] lock(&ret_buf->chan_lock); > [ 1293.565077] lock(&cifs_tcp_ses_lock); > [ 1293.565387] > [ 1293.565387] *** DEADLOCK *** > [ 1293.565387] > [ 1293.565852] 2 locks held by cat/1068: > [ 1293.566147] #0: ffff88800818fc48 (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x59/0x470 > [ 1293.566767] #1: ffff88800aed2b28 (&ret_buf->chan_lock){+.+.}-{2:2}, at: cifs_debug_data_proc_show+0x555/0xee0 [cifs] > [ 1293.567611] > [ 1293.567611] stack backtrace: > [ 1293.567954] CPU: 3 PID: 1068 Comm: cat Tainted: G W 6.6.0-rc7 #2 > [ 1293.568536] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 > [ 1293.569387] Call Trace: > [ 1293.569585] <TASK> > [ 1293.569755] dump_stack_lvl+0x4a/0x80 > [ 1293.570047] check_noncircular+0x14e/0x170 > [ 1293.570373] ? save_trace+0x3e/0x390 > [ 1293.570659] __lock_acquire+0x1521/0x2660 > [ 1293.570982] lock_acquire+0xbf/0x2b0 > [ 1293.571268] ? cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.571687] _raw_spin_lock+0x34/0x80 > [ 1293.571977] ? cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.572394] cifs_put_tcp_session+0x1c/0x180 [cifs] > [ 1293.572795] cifs_debug_data_proc_show+0xd15/0xee0 [cifs] > [ 1293.573241] seq_read_iter+0x118/0x470 > [ 1293.573546] proc_reg_read_iter+0x53/0x90 > [ 1293.573861] ? srso_alias_return_thunk+0x5/0x7f > [ 1293.574218] vfs_read+0x201/0x350 > [ 1293.574489] ksys_read+0x75/0x100 > [ 1293.574752] do_syscall_64+0x3f/0x90 > [ 1293.575030] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 > [ 1293.575428] RIP: 0033:0x7f71d767e381 > [ 1293.575716] Code: ff ff eb c3 e8 0e ea 01 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 90 90 80 3d a5 f6 0e 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 > [ 1293.577151] RSP: 002b:00007ffc312d65a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 > [ 1293.577736] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f71d767e381 > [ 1293.578286] RDX: 0000000000020000 RSI: 00007f71d74f8000 RDI: 0000000000000003 > [ 1293.578839] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 > [ 1293.579391] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f71d74f8000 > [ 1293.579951] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000020000 > [ 1293.580511] </TASK>
Hi Shyam, Shyam Prasad N <nspmangalore@gmail.com> writes: > On Wed, Nov 1, 2023 at 7:42 PM Paulo Alcantara <pc@manguebit.com> wrote: >> >> Paulo Alcantara <pc@manguebit.com> writes: >> >> >> @@ -515,7 +573,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) >> >> seq_printf(m, "\n\n\tExtra Channels: %zu ", >> >> ses->chan_count-1); >> >> for (j = 1; j < ses->chan_count; j++) { >> >> + /* >> >> + * kernel_getsockname can block inside >> >> + * cifs_dump_channel. so drop the lock first >> >> + */ >> >> + server->srv_count++; >> >> + spin_unlock(&cifs_tcp_ses_lock); >> >> + >> >> cifs_dump_channel(m, j, &ses->chans[j]); >> >> + >> >> + cifs_put_tcp_session(server, 0); >> >> + spin_lock(&cifs_tcp_ses_lock); >> > >> > Here you are re-acquiring @cifs_tcp_ses_lock spinlock under >> > @ses->chan_lock, which will introduce deadlocks in threads calling >> > cifs_match_super(), cifs_signal_cifsd_for_reconnect(), >> > cifs_mark_tcp_ses_conns_for_reconnect(), cifs_find_smb_ses(), ... >> > > Good points. > I'm just wondering why I'm unable to repro the same though. > I have lockdep enabled on my kernel too. But the same steps do not > throw this warning. That's weird as I can always reproduce it when mounting a Windows Server 2022 share with 'username=foo,password=bar,multichannel' options followed by `cat /proc/fs/cifs/DebugData`. Here is the relevant part of my .config # # Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_LOCK_DEBUGGING_SUPPORT=y CONFIG_PROVE_LOCKING=y # CONFIG_PROVE_RAW_LOCK_NESTING is not set CONFIG_LOCK_STAT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y CONFIG_DEBUG_RWSEMS=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_LOCKDEP=y CONFIG_LOCKDEP_BITS=15 CONFIG_LOCKDEP_CHAINS_BITS=16 CONFIG_LOCKDEP_STACK_TRACE_BITS=19 CONFIG_LOCKDEP_STACK_TRACE_HASH_BITS=14 CONFIG_LOCKDEP_CIRCULAR_QUEUE_BITS=12 # CONFIG_DEBUG_LOCKDEP is not set CONFIG_DEBUG_ATOMIC_SLEEP=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...)
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index e23fcabb78d6..d8362e098310 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -13,6 +13,7 @@ #include <linux/proc_fs.h> #include <linux/uaccess.h> #include <uapi/linux/ethtool.h> +#include <net/inet_sock.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -158,11 +159,37 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) in_flight(server), atomic_read(&server->in_send), atomic_read(&server->num_waiters)); + #ifdef CONFIG_NET_NS if (server->net) seq_printf(m, " Net namespace: %u ", server->net->ns.inum); #endif /* NET_NS */ +#ifdef CONFIG_CIFS_SMB_DIRECT + if (!server->rdma) + goto skip_rdma; + + if (server->smbd_conn && server->smbd_conn->id) { + struct rdma_addr *addr = + &server->smbd_conn->id->route.addr; + seq_printf(m, "\n\t\tIP addr: dst: %pISpc, src: %pISpc", + &addr->dst_addr, &addr->src_addr); + } + +skip_rdma: +#endif + if (server->ssocket) { + struct sockaddr src; + int addrlen; + + addrlen = kernel_getsockname(server->ssocket, &src); + if (addrlen != sizeof(struct sockaddr_in) && + addrlen != sizeof(struct sockaddr_in6)) + return; + + seq_printf(m, "\n\t\tIP addr: dst: %pISpc, src: %pISpc", + &server->dstaddr, &src); + } } static inline const char *smb_speed_to_str(size_t bps) @@ -279,7 +306,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct mid_q_entry *mid_entry; - struct TCP_Server_Info *server; + struct TCP_Server_Info *server, *nserver; struct TCP_Server_Info *chan_server; struct cifs_ses *ses; struct cifs_tcon *tcon; @@ -336,7 +363,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) c = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry_safe(server, nserver, &cifs_tcp_ses_list, tcp_ses_list) { /* channel info will be printed as a part of sessions below */ if (SERVER_IS_CHAN(server)) continue; @@ -414,8 +441,39 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x", atomic_read(&server->smbd_conn->mr_ready_count), atomic_read(&server->smbd_conn->mr_used_count)); + if (server->smbd_conn->id) { + struct rdma_addr *addr = + &server->smbd_conn->id->route.addr; + seq_printf(m, "\nIP addr: dst: %pISpc, src: %pISpc", + &addr->dst_addr, &addr->src_addr); + } skip_rdma: #endif + if (server->ssocket) { + struct sockaddr src; + int addrlen; + + /* kernel_getsockname can block. so drop the lock first */ + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + + addrlen = kernel_getsockname(server->ssocket, &src); + if (addrlen != sizeof(struct sockaddr_in) && + addrlen != sizeof(struct sockaddr_in6)) { + cifs_put_tcp_session(server, 0); + spin_lock(&cifs_tcp_ses_lock); + + goto skip_addr_details; + } + + seq_printf(m, "\nIP addr: dst: %pISpc, src: %pISpc", + &server->dstaddr, &src); + + cifs_put_tcp_session(server, 0); + spin_lock(&cifs_tcp_ses_lock); + } + +skip_addr_details: seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", server->credits, server->echo_credits, @@ -515,7 +573,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n\n\tExtra Channels: %zu ", ses->chan_count-1); for (j = 1; j < ses->chan_count; j++) { + /* + * kernel_getsockname can block inside + * cifs_dump_channel. so drop the lock first + */ + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + cifs_dump_channel(m, j, &ses->chans[j]); + + cifs_put_tcp_session(server, 0); + spin_lock(&cifs_tcp_ses_lock); + if (CIFS_CHAN_NEEDS_RECONNECT(ses, j)) seq_puts(m, "\tDISCONNECTED "); if (CIFS_CHAN_IN_RECONNECT(ses, j))