diff mbox series

[mptcp-next,2/5] mptcp: annotate lockless access for the tx path

Message ID 92930a7b962611507f4bb87671d97f526ece5952.1705427537.git.pabeni@redhat.com (mailing list archive)
State Accepted, archived
Commit 483c63a2857c3c1e770580a2077f95df2315c3fd
Delegated to: Matthieu Baerts
Headers show
Series mptcp: annotate lockless access | expand

Checks

Context Check Description
matttbe/build success Build and static analysis OK
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 59 lines checked
matttbe/KVM_Validation__normal warning Unstable: 3 failed test(s): packetdrill_regressions packetdrill_syscalls selftest_mptcp_join
matttbe/KVM_Validation__debug__except_selftest_mptcp_join_ success Success! ✅
matttbe/KVM_Validation__debug__only_selftest_mptcp_join_ success Success! ✅

Commit Message

Paolo Abeni Jan. 16, 2024, 6:16 p.m. UTC
The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
the msk socket lock protection, and are accessed lockless in a few spots.

Always mark the write operations with WRITE_ONCE, read operations
outside the lock with READ_ONCE and drop the annotation for read
under such lock.

To simplify the annotations move mptcp_pending_data_fin_ack() from
__mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
lock, where such call would belong.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/options.c  |  2 +-
 net/mptcp/protocol.c | 14 ++++++--------
 net/mptcp/protocol.h |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

Comments

Mat Martineau Jan. 19, 2024, 1:03 a.m. UTC | #1
On Tue, 16 Jan 2024, Paolo Abeni wrote:

> The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
> the msk socket lock protection, and are accessed lockless in a few spots.
>
> Always mark the write operations with WRITE_ONCE, read operations
> outside the lock with READ_ONCE and drop the annotation for read
> under such lock.
>
> To simplify the annotations move mptcp_pending_data_fin_ack() from
> __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
> lock, where such call would belong.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/options.c  |  2 +-
> net/mptcp/protocol.c | 14 ++++++--------
> net/mptcp/protocol.h |  2 +-
> 3 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index cebbd0bc32aa..51b00d7e7c89 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
> 		msk->wnd_end = new_wnd_end;
>
> 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
> -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
> +	if (after64(msk->wnd_end, snd_nxt))

Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before 
the data_lock is acquired.

- Mat

> 		__mptcp_check_push(sk, ssk);
>
> 	if (after64(new_snd_una, old_snd_una)) {
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 9ad672a10c11..679d4576d2c1 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1033,13 +1033,14 @@ static void __mptcp_clean_una(struct sock *sk)
> 		msk->recovery = false;
>
> out:
> -	if (snd_una == READ_ONCE(msk->snd_nxt) &&
> -	    snd_una == READ_ONCE(msk->write_seq)) {
> +	if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
> 		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
> 			mptcp_stop_rtx_timer(sk);
> 	} else {
> 		mptcp_reset_rtx_timer(sk);
> 	}
> +	if (mptcp_pending_data_fin_ack(sk))
> +		mptcp_schedule_work(sk);
> }
>
> static void __mptcp_clean_una_wakeup(struct sock *sk)
> @@ -1499,7 +1500,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> 	 */
> 	if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
> 		msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
> -		msk->snd_nxt = snd_nxt_new;
> +		WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
> 	}
> }
>
> @@ -3210,8 +3211,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
> 	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
> 		WRITE_ONCE(msk->csum_enabled, true);
>
> -	msk->write_seq = subflow_req->idsn + 1;
> -	msk->snd_nxt = msk->write_seq;
> +	WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
> +	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
> 	msk->snd_una = msk->write_seq;
> 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
> 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
> @@ -3316,9 +3317,6 @@ void __mptcp_data_acked(struct sock *sk)
> 		__mptcp_clean_una(sk);
> 	else
> 		__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
> -
> -	if (mptcp_pending_data_fin_ack(sk))
> -		mptcp_schedule_work(sk);
> }
>
> void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 3af85643328e..d05ec76dd7c2 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -402,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
>
> -	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
> +	if (msk->snd_una == msk->snd_nxt)
> 		return NULL;
>
> 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
> -- 
> 2.43.0
>
>
>
Paolo Abeni Jan. 21, 2024, 9:47 p.m. UTC | #2
On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote:
> On Tue, 16 Jan 2024, Paolo Abeni wrote:
> 
> > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
> > the msk socket lock protection, and are accessed lockless in a few spots.
> > 
> > Always mark the write operations with WRITE_ONCE, read operations
> > outside the lock with READ_ONCE and drop the annotation for read
> > under such lock.
> > 
> > To simplify the annotations move mptcp_pending_data_fin_ack() from
> > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
> > lock, where such call would belong.
> > 
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > net/mptcp/options.c  |  2 +-
> > net/mptcp/protocol.c | 14 ++++++--------
> > net/mptcp/protocol.h |  2 +-
> > 3 files changed, 8 insertions(+), 10 deletions(-)
> > 
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index cebbd0bc32aa..51b00d7e7c89 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
> > 		msk->wnd_end = new_wnd_end;
> > 
> > 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
> > -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
> > +	if (after64(msk->wnd_end, snd_nxt))
> 
> Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before 
> the data_lock is acquired.

You are right, READ_ONCE is required here. This is outside the msk
socket lock. I'll send a v2.

Thanks!

Paolo
Paolo Abeni Jan. 22, 2024, 3:07 p.m. UTC | #3
On Sun, 2024-01-21 at 22:47 +0100, Paolo Abeni wrote:
> On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote:
> > On Tue, 16 Jan 2024, Paolo Abeni wrote:
> > 
> > > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
> > > the msk socket lock protection, and are accessed lockless in a few spots.
> > > 
> > > Always mark the write operations with WRITE_ONCE, read operations
> > > outside the lock with READ_ONCE and drop the annotation for read
> > > under such lock.
> > > 
> > > To simplify the annotations move mptcp_pending_data_fin_ack() from
> > > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
> > > lock, where such call would belong.
> > > 
> > > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > > ---
> > > net/mptcp/options.c  |  2 +-
> > > net/mptcp/protocol.c | 14 ++++++--------
> > > net/mptcp/protocol.h |  2 +-
> > > 3 files changed, 8 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > > index cebbd0bc32aa..51b00d7e7c89 100644
> > > --- a/net/mptcp/options.c
> > > +++ b/net/mptcp/options.c
> > > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
> > > 		msk->wnd_end = new_wnd_end;
> > > 
> > > 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
> > > -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
> > > +	if (after64(msk->wnd_end, snd_nxt))
> > 
> > Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before 
> > the data_lock is acquired.
> 
> You are right, READ_ONCE is required here. This is outside the msk
> socket lock. I'll send a v2.

Oops, sorry I was too hasty. After a better re-read I think this patch
is correct.

We don't need to re-read snd_nxt inside the data_lock:
- the 'new' value will still _not_ be any more accurate, as the lock
protecting such lock is the msk _socket_ lock.
- we don't have any data dependency with others msk fields

The READ_ONCE annotation is there just to make the fuzzer happy and
avoid reporting data races, we don't need to issue the read operation
multiple time.

Please let me know if the above makes sense to you!

Cheers,

Paolo
Mat Martineau Jan. 23, 2024, 1:21 a.m. UTC | #4
On Mon, 22 Jan 2024, Paolo Abeni wrote:

> On Sun, 2024-01-21 at 22:47 +0100, Paolo Abeni wrote:
>> On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote:
>>> On Tue, 16 Jan 2024, Paolo Abeni wrote:
>>>
>>>> The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
>>>> the msk socket lock protection, and are accessed lockless in a few spots.
>>>>
>>>> Always mark the write operations with WRITE_ONCE, read operations
>>>> outside the lock with READ_ONCE and drop the annotation for read
>>>> under such lock.
>>>>
>>>> To simplify the annotations move mptcp_pending_data_fin_ack() from
>>>> __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
>>>> lock, where such call would belong.
>>>>
>>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>>> ---
>>>> net/mptcp/options.c  |  2 +-
>>>> net/mptcp/protocol.c | 14 ++++++--------
>>>> net/mptcp/protocol.h |  2 +-
>>>> 3 files changed, 8 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>>>> index cebbd0bc32aa..51b00d7e7c89 100644
>>>> --- a/net/mptcp/options.c
>>>> +++ b/net/mptcp/options.c
>>>> @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
>>>> 		msk->wnd_end = new_wnd_end;
>>>>
>>>> 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
>>>> -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
>>>> +	if (after64(msk->wnd_end, snd_nxt))
>>>
>>> Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before
>>> the data_lock is acquired.
>>
>> You are right, READ_ONCE is required here. This is outside the msk
>> socket lock. I'll send a v2.
>
> Oops, sorry I was too hasty. After a better re-read I think this patch
> is correct.
>
> We don't need to re-read snd_nxt inside the data_lock:
> - the 'new' value will still _not_ be any more accurate, as the lock
> protecting such lock is the msk _socket_ lock.
> - we don't have any data dependency with others msk fields
>
> The READ_ONCE annotation is there just to make the fuzzer happy and
> avoid reporting data races, we don't need to issue the read operation
> multiple time.
>
> Please let me know if the above makes sense to you!
>

Yeah, that makes sense. Also, the only (incredibly rare) consequence of a 
stale snd_nxt here could be an unnecessary call to __mptcp_check_push(), 
which is not a problem.

- Mat
diff mbox series

Patch

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index cebbd0bc32aa..51b00d7e7c89 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1061,7 +1061,7 @@  static void ack_update_msk(struct mptcp_sock *msk,
 		msk->wnd_end = new_wnd_end;
 
 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
-	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
+	if (after64(msk->wnd_end, snd_nxt))
 		__mptcp_check_push(sk, ssk);
 
 	if (after64(new_snd_una, old_snd_una)) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9ad672a10c11..679d4576d2c1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1033,13 +1033,14 @@  static void __mptcp_clean_una(struct sock *sk)
 		msk->recovery = false;
 
 out:
-	if (snd_una == READ_ONCE(msk->snd_nxt) &&
-	    snd_una == READ_ONCE(msk->write_seq)) {
+	if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
 		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
 			mptcp_stop_rtx_timer(sk);
 	} else {
 		mptcp_reset_rtx_timer(sk);
 	}
+	if (mptcp_pending_data_fin_ack(sk))
+		mptcp_schedule_work(sk);
 }
 
 static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1499,7 +1500,7 @@  static void mptcp_update_post_push(struct mptcp_sock *msk,
 	 */
 	if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
 		msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
-		msk->snd_nxt = snd_nxt_new;
+		WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
 	}
 }
 
@@ -3210,8 +3211,8 @@  struct sock *mptcp_sk_clone_init(const struct sock *sk,
 	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
 		WRITE_ONCE(msk->csum_enabled, true);
 
-	msk->write_seq = subflow_req->idsn + 1;
-	msk->snd_nxt = msk->write_seq;
+	WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
+	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
@@ -3316,9 +3317,6 @@  void __mptcp_data_acked(struct sock *sk)
 		__mptcp_clean_una(sk);
 	else
 		__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
-
-	if (mptcp_pending_data_fin_ack(sk))
-		mptcp_schedule_work(sk);
 }
 
 void __mptcp_check_push(struct sock *sk, struct sock *ssk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3af85643328e..d05ec76dd7c2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -402,7 +402,7 @@  static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
+	if (msk->snd_una == msk->snd_nxt)
 		return NULL;
 
 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);