@@ -655,6 +655,14 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
+ /* Increment the psock refcnt to synchronize with close(fd) path in
+ * sock_map_close(), ensuring we wait for backlog thread completion
+ * before sk_socket freed. If refcnt increment fails, it indicates
+ * sock_map_close() completed with sk_socket potentially already freed.
+ */
+ if (!sk_psock_get(psock->sk))
+ return;
+
mutex_lock(&psock->work_mutex);
if (unlikely(state->len)) {
len = state->len;
@@ -702,6 +710,7 @@ static void sk_psock_backlog(struct work_struct *work)
}
end:
mutex_unlock(&psock->work_mutex);
+ sk_psock_put(psock->sk, psock);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
The sk->sk_socket is not locked or referenced, and during the call to skb_send_sock(), there is a race condition with the release of sk_socket. All types of sockets(tcp/udp/unix/vsock) will be affected. Race conditions: ''' CPU0 CPU1 skb_send_sock sendmsg_unlocked sock_sendmsg sock_sendmsg_nosec close(fd): ... ops->release() sock_map_close() sk_socket->ops = NULL free(socket) sock->ops->sendmsg ^ panic here ''' Based on the fact that we already wait for the workqueue to finish in sock_map_close() if psock is held, we simply increase the psock reference count to avoid race conditions. ''' void sock_map_close() { ... if (likely(psock)) { ... psock = sk_psock_get(sk); if (unlikely(!psock)) goto no_psock; <=== Control usually jumps here via goto ... cancel_delayed_work_sync(&psock->work); <=== not executed sk_psock_put(sk, psock); ... } ''' The panic I catched: ''' Workqueue: events sk_psock_backlog RIP: 0010:sock_sendmsg+0x21d/0x440 RAX: 0000000000000000 RBX: ffffc9000521fad8 RCX: 0000000000000001 ... Call Trace: <TASK> ? die_addr+0x40/0xa0 ? exc_general_protection+0x14c/0x230 ? asm_exc_general_protection+0x26/0x30 ? sock_sendmsg+0x21d/0x440 ? sock_sendmsg+0x3e0/0x440 ? __pfx_sock_sendmsg+0x10/0x10 __skb_send_sock+0x543/0xb70 sk_psock_backlog+0x247/0xb80 ... ''' Reported-by: Michal Luczaj <mhal@rbox.co> Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev> --- Some approach I tried 1. add rcu: - RCU conflicts with mutex_lock in Unix socket send path. - Race conditions still exist when reading sk->sk_socket->ops for in current sock_sendmsg implementation. 2. Increased the reference of sk_socket->file: - If the user calls close(fd), we will do nothing because the reference count is not set to 0. It's unexpected. 3. Use sock_lock when calling skb_send_sock: - skb_send_sock itself already do the locking. - If we call skb_send_sock_locked instead, we have to implement sendmsg_locked for each protocol, which is not easy for UDP or Unix, as the sending process involves frequent locking and unlocking, which makes it challenging to isolate the locking logic. --- net/core/skmsg.c | 9 +++++++++ 1 file changed, 9 insertions(+)