@@ -797,7 +797,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
- if (likely(subflow->pm_notified))
+ /* if the subflow is not already linked into the conn_list, we can't
+ * notify the PM: this subflow is still on the listener queue
+ * and the PM possibly acquiring the subflow lock could race with
+ * the listener close
+ */
+ if (likely(subflow->pm_notified) || list_empty(&subflow->node))
return true;
subflow->pm_notified = 1;
@@ -111,8 +111,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
spin_lock_bh(&pm->lock);
- if (READ_ONCE(pm->work_pending))
+ /* mptcp_pm_fully_established() can be invoked by multiple
+ * racing paths - accept() and check_fully_established()
+ * be sure to serve this event only once.
+ */
+ if (READ_ONCE(pm->work_pending) &&
+ !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
+ msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
@@ -3208,6 +3208,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
bool slowpath;
slowpath = lock_sock_fast(newsk);
+
+ /* PM/worker can now acquire the first subflow socket
+ * lock without racing with listener queue cleanup,
+ * we can notify it, if needed.
+ */
+ subflow = mptcp_subflow_ctx(msk->first);
+ list_add(&subflow->node, &msk->conn_list);
+ sock_hold(msk->first);
+ if (mptcp_is_fully_established(newsk))
+ mptcp_pm_fully_established(msk);
+
mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
@@ -165,6 +165,7 @@ enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_SEND_ACK,
MPTCP_PM_RM_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED,
+ MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_SUBFLOW_ESTABLISHED,
};
@@ -614,8 +614,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
*/
inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
- /* link the newly created socket to the msk */
- mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx);
+ /* record the newly created socket as the first msk
+ * subflow, but don't link it yet into conn_list
+ */
WRITE_ONCE(mptcp_sk(new_msk)->first, child);
/* new mpc subflow takes ownership of the newly
@@ -1148,13 +1149,18 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr);
+ mptcp_add_pending_subflow(msk, subflow);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
- goto failed;
+ goto failed_unlink;
- mptcp_add_pending_subflow(msk, subflow);
return err;
+failed_unlink:
+ spin_lock_bh(&msk->join_list_lock);
+ list_del(&subflow->node);
+ spin_unlock_bh(&msk->join_list_lock);
+
failed:
subflow->disposable = 1;
sock_release(sf);