Message ID | 92930a7b962611507f4bb87671d97f526ece5952.1705427537.git.pabeni@redhat.com (mailing list archive) |
---|---|
State | Accepted, archived |
Commit | 483c63a2857c3c1e770580a2077f95df2315c3fd |
Delegated to: | Matthieu Baerts |
Headers | show |
Series | mptcp: annotate lockless access | expand |
Context | Check | Description |
---|---|---|
matttbe/build | success | Build and static analysis OK |
matttbe/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 59 lines checked |
matttbe/KVM_Validation__normal | warning | Unstable: 3 failed test(s): packetdrill_regressions packetdrill_syscalls selftest_mptcp_join |
matttbe/KVM_Validation__debug__except_selftest_mptcp_join_ | success | Success! ✅ |
matttbe/KVM_Validation__debug__only_selftest_mptcp_join_ | success | Success! ✅ |
On Tue, 16 Jan 2024, Paolo Abeni wrote: > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under > the msk socket lock protection, and are accessed lockless in a few spots. > > Always mark the write operations with WRITE_ONCE, read operations > outside the lock with READ_ONCE and drop the annotation for read > under such lock. > > To simplify the annotations move mptcp_pending_data_fin_ack() from > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket > lock, where such call would belong. > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > --- > net/mptcp/options.c | 2 +- > net/mptcp/protocol.c | 14 ++++++-------- > net/mptcp/protocol.h | 2 +- > 3 files changed, 8 insertions(+), 10 deletions(-) > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > index cebbd0bc32aa..51b00d7e7c89 100644 > --- a/net/mptcp/options.c > +++ b/net/mptcp/options.c > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk, > msk->wnd_end = new_wnd_end; > > /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ > - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) > + if (after64(msk->wnd_end, snd_nxt)) Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before the data_lock is acquired. - Mat > __mptcp_check_push(sk, ssk); > > if (after64(new_snd_una, old_snd_una)) { > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 9ad672a10c11..679d4576d2c1 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1033,13 +1033,14 @@ static void __mptcp_clean_una(struct sock *sk) > msk->recovery = false; > > out: > - if (snd_una == READ_ONCE(msk->snd_nxt) && > - snd_una == READ_ONCE(msk->write_seq)) { > + if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) { > if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) > mptcp_stop_rtx_timer(sk); > } else { > mptcp_reset_rtx_timer(sk); > } > + if (mptcp_pending_data_fin_ack(sk)) > + mptcp_schedule_work(sk); > } > > static void __mptcp_clean_una_wakeup(struct sock *sk) > @@ -1499,7 +1500,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, > */ > if (likely(after64(snd_nxt_new, msk->snd_nxt))) { > msk->bytes_sent += snd_nxt_new - msk->snd_nxt; > - msk->snd_nxt = snd_nxt_new; > + WRITE_ONCE(msk->snd_nxt, snd_nxt_new); > } > } > > @@ -3210,8 +3211,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, > if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) > WRITE_ONCE(msk->csum_enabled, true); > > - msk->write_seq = subflow_req->idsn + 1; > - msk->snd_nxt = msk->write_seq; > + WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1); > + WRITE_ONCE(msk->snd_nxt, msk->write_seq); > msk->snd_una = msk->write_seq; > msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; > msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; > @@ -3316,9 +3317,6 @@ void __mptcp_data_acked(struct sock *sk) > __mptcp_clean_una(sk); > else > __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags); > - > - if (mptcp_pending_data_fin_ack(sk)) > - mptcp_schedule_work(sk); > } > > void __mptcp_check_push(struct sock *sk, struct sock *ssk) > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index 3af85643328e..d05ec76dd7c2 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -402,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) > { > struct mptcp_sock *msk = mptcp_sk(sk); > > - if (msk->snd_una == READ_ONCE(msk->snd_nxt)) > + if (msk->snd_una == msk->snd_nxt) > return NULL; > > return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); > -- > 2.43.0 > > >
On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote: > On Tue, 16 Jan 2024, Paolo Abeni wrote: > > > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under > > the msk socket lock protection, and are accessed lockless in a few spots. > > > > Always mark the write operations with WRITE_ONCE, read operations > > outside the lock with READ_ONCE and drop the annotation for read > > under such lock. > > > > To simplify the annotations move mptcp_pending_data_fin_ack() from > > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket > > lock, where such call would belong. > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > --- > > net/mptcp/options.c | 2 +- > > net/mptcp/protocol.c | 14 ++++++-------- > > net/mptcp/protocol.h | 2 +- > > 3 files changed, 8 insertions(+), 10 deletions(-) > > > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > > index cebbd0bc32aa..51b00d7e7c89 100644 > > --- a/net/mptcp/options.c > > +++ b/net/mptcp/options.c > > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk, > > msk->wnd_end = new_wnd_end; > > > > /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ > > - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) > > + if (after64(msk->wnd_end, snd_nxt)) > > Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before > the data_lock is acquired. You are right, READ_ONCE is required here. This is outside the msk socket lock. I'll send a v2. Thanks! Paolo
On Sun, 2024-01-21 at 22:47 +0100, Paolo Abeni wrote: > On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote: > > On Tue, 16 Jan 2024, Paolo Abeni wrote: > > > > > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under > > > the msk socket lock protection, and are accessed lockless in a few spots. > > > > > > Always mark the write operations with WRITE_ONCE, read operations > > > outside the lock with READ_ONCE and drop the annotation for read > > > under such lock. > > > > > > To simplify the annotations move mptcp_pending_data_fin_ack() from > > > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket > > > lock, where such call would belong. > > > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > > --- > > > net/mptcp/options.c | 2 +- > > > net/mptcp/protocol.c | 14 ++++++-------- > > > net/mptcp/protocol.h | 2 +- > > > 3 files changed, 8 insertions(+), 10 deletions(-) > > > > > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > > > index cebbd0bc32aa..51b00d7e7c89 100644 > > > --- a/net/mptcp/options.c > > > +++ b/net/mptcp/options.c > > > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk, > > > msk->wnd_end = new_wnd_end; > > > > > > /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ > > > - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) > > > + if (after64(msk->wnd_end, snd_nxt)) > > > > Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before > > the data_lock is acquired. > > You are right, READ_ONCE is required here. This is outside the msk > socket lock. I'll send a v2. Oops, sorry I was too hasty. After a better re-read I think this patch is correct. We don't need to re-read snd_nxt inside the data_lock: - the 'new' value will still _not_ be any more accurate, as the lock protecting such lock is the msk _socket_ lock. - we don't have any data dependency with others msk fields The READ_ONCE annotation is there just to make the fuzzer happy and avoid reporting data races, we don't need to issue the read operation multiple time. Please let me know if the above makes sense to you! Cheers, Paolo
On Mon, 22 Jan 2024, Paolo Abeni wrote: > On Sun, 2024-01-21 at 22:47 +0100, Paolo Abeni wrote: >> On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote: >>> On Tue, 16 Jan 2024, Paolo Abeni wrote: >>> >>>> The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under >>>> the msk socket lock protection, and are accessed lockless in a few spots. >>>> >>>> Always mark the write operations with WRITE_ONCE, read operations >>>> outside the lock with READ_ONCE and drop the annotation for read >>>> under such lock. >>>> >>>> To simplify the annotations move mptcp_pending_data_fin_ack() from >>>> __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket >>>> lock, where such call would belong. >>>> >>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com> >>>> --- >>>> net/mptcp/options.c | 2 +- >>>> net/mptcp/protocol.c | 14 ++++++-------- >>>> net/mptcp/protocol.h | 2 +- >>>> 3 files changed, 8 insertions(+), 10 deletions(-) >>>> >>>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c >>>> index cebbd0bc32aa..51b00d7e7c89 100644 >>>> --- a/net/mptcp/options.c >>>> +++ b/net/mptcp/options.c >>>> @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk, >>>> msk->wnd_end = new_wnd_end; >>>> >>>> /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ >>>> - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) >>>> + if (after64(msk->wnd_end, snd_nxt)) >>> >>> Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before >>> the data_lock is acquired. >> >> You are right, READ_ONCE is required here. This is outside the msk >> socket lock. I'll send a v2. > > Oops, sorry I was too hasty. After a better re-read I think this patch > is correct. > > We don't need to re-read snd_nxt inside the data_lock: > - the 'new' value will still _not_ be any more accurate, as the lock > protecting such lock is the msk _socket_ lock. > - we don't have any data dependency with others msk fields > > The READ_ONCE annotation is there just to make the fuzzer happy and > avoid reporting data races, we don't need to issue the read operation > multiple time. > > Please let me know if the above makes sense to you! > Yeah, that makes sense. Also, the only (incredibly rare) consequence of a stale snd_nxt here could be an unnecessary call to __mptcp_check_push(), which is not a problem. - Mat
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index cebbd0bc32aa..51b00d7e7c89 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk, msk->wnd_end = new_wnd_end; /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) + if (after64(msk->wnd_end, snd_nxt)) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9ad672a10c11..679d4576d2c1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1033,13 +1033,14 @@ static void __mptcp_clean_una(struct sock *sk) msk->recovery = false; out: - if (snd_una == READ_ONCE(msk->snd_nxt) && - snd_una == READ_ONCE(msk->write_seq)) { + if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) { if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_rtx_timer(sk); } else { mptcp_reset_rtx_timer(sk); } + if (mptcp_pending_data_fin_ack(sk)) + mptcp_schedule_work(sk); } static void __mptcp_clean_una_wakeup(struct sock *sk) @@ -1499,7 +1500,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, */ if (likely(after64(snd_nxt_new, msk->snd_nxt))) { msk->bytes_sent += snd_nxt_new - msk->snd_nxt; - msk->snd_nxt = snd_nxt_new; + WRITE_ONCE(msk->snd_nxt, snd_nxt_new); } } @@ -3210,8 +3211,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); - msk->write_seq = subflow_req->idsn + 1; - msk->snd_nxt = msk->write_seq; + WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); msk->snd_una = msk->write_seq; msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; @@ -3316,9 +3317,6 @@ void __mptcp_data_acked(struct sock *sk) __mptcp_clean_una(sk); else __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags); - - if (mptcp_pending_data_fin_ack(sk)) - mptcp_schedule_work(sk); } void __mptcp_check_push(struct sock *sk, struct sock *ssk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3af85643328e..d05ec76dd7c2 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -402,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (msk->snd_una == READ_ONCE(msk->snd_nxt)) + if (msk->snd_una == msk->snd_nxt) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under the msk socket lock protection, and are accessed lockless in a few spots. Always mark the write operations with WRITE_ONCE, read operations outside the lock with READ_ONCE and drop the annotation for read under such lock. To simplify the annotations move mptcp_pending_data_fin_ack() from __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket lock, where such call would belong. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 2 +- net/mptcp/protocol.c | 14 ++++++-------- net/mptcp/protocol.h | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-)