diff mbox series

[mptcp-next,v2,3/7] mptcp: fix potential wake-up event loss

Message ID 78924a15f3d86f989fa286f10ef88670b790eb89.1707739536.git.pabeni@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Mat Martineau
Headers show
Series mptcp: implement TCP_NOTSENT_LOWAT support | expand

Checks

Context Check Description
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 46 lines checked
matttbe/build success Build and static analysis OK
matttbe/KVM_Validation__normal success Success! ✅
matttbe/KVM_Validation__debug__except_selftest_mptcp_join_ success Success! ✅
matttbe/KVM_Validation__debug__only_selftest_mptcp_join_ success Success! ✅

Commit Message

Paolo Abeni Feb. 12, 2024, 3:18 p.m. UTC
After the blamed commit below, the send buffer auto-tuning can
happen after that the mptcp_propagate_sndbuf() completes - via
the delegated action infrastructure.

We must check for write space even after such change or we risk
missing the wake-up event.

Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.h | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

Comments

Mat Martineau Feb. 15, 2024, 10:20 p.m. UTC | #1
On Mon, 12 Feb 2024, Paolo Abeni wrote:

> After the blamed commit below, the send buffer auto-tuning can
> happen after that the mptcp_propagate_sndbuf() completes - via
> the delegated action infrastructure.
>
> We must check for write space even after such change or we risk
> missing the wake-up event.
>
> Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/protocol.h | 27 ++++++++++++++++++++-------
> 1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 486fff865803..2e197262a42e 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -797,6 +797,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
> 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
> }
>
> +static inline void mptcp_write_space(struct sock *sk)
> +{
> +	if (sk_stream_is_writeable(sk)) {
> +		/* pairs with memory barrier in mptcp_poll */
> +		smp_mb();
> +		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
> +			sk_stream_write_space(sk);
> +	}
> +}
> +
> static inline void __mptcp_sync_sndbuf(struct sock *sk)
> {
> 	struct mptcp_subflow_context *subflow;
> @@ -815,6 +825,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
>
> 	/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
> 	WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
> +	mptcp_write_space(sk);
> }
>
> /* The called held both the msk socket and the subflow socket locks,
> @@ -845,14 +856,16 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
> 	local_bh_enable();
> }
>
> -static inline void mptcp_write_space(struct sock *sk)
> +static inline void __mptcp_sync_sndnxt(struct sock *sk, u64 new_snd_nxt)

Hi Paolo -

I think you missed my comment on this in v1, __mptcp_sync_sndnxt() isn't 
called anywhere.

- Mat

> {
> -	if (sk_stream_is_writeable(sk)) {
> -		/* pairs with memory barrier in mptcp_poll */
> -		smp_mb();
> -		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
> -			sk_stream_write_space(sk);
> -	}
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	if (!after64(new_snd_nxt, msk->snd_nxt))
> +		return;
> +
> +	msk->bytes_sent += new_snd_nxt - msk->snd_nxt;
> +	WRITE_ONCE(msk->snd_nxt, new_snd_nxt);
> +	mptcp_write_space(sk);
> }
>
> void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
> -- 
> 2.43.0
>
>
>
Paolo Abeni Feb. 16, 2024, 9:39 a.m. UTC | #2
On Thu, 2024-02-15 at 14:20 -0800, Mat Martineau wrote:
> On Mon, 12 Feb 2024, Paolo Abeni wrote:
> 
> > After the blamed commit below, the send buffer auto-tuning can
> > happen after that the mptcp_propagate_sndbuf() completes - via
> > the delegated action infrastructure.
> > 
> > We must check for write space even after such change or we risk
> > missing the wake-up event.
> > 
> > Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > net/mptcp/protocol.h | 27 ++++++++++++++++++++-------
> > 1 file changed, 20 insertions(+), 7 deletions(-)
> > 
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 486fff865803..2e197262a42e 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -797,6 +797,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
> > 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
> > }
> > 
> > +static inline void mptcp_write_space(struct sock *sk)
> > +{
> > +	if (sk_stream_is_writeable(sk)) {
> > +		/* pairs with memory barrier in mptcp_poll */
> > +		smp_mb();
> > +		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
> > +			sk_stream_write_space(sk);
> > +	}
> > +}
> > +
> > static inline void __mptcp_sync_sndbuf(struct sock *sk)
> > {
> > 	struct mptcp_subflow_context *subflow;
> > @@ -815,6 +825,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
> > 
> > 	/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
> > 	WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
> > +	mptcp_write_space(sk);
> > }
> > 
> > /* The called held both the msk socket and the subflow socket locks,
> > @@ -845,14 +856,16 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
> > 	local_bh_enable();
> > }
> > 
> > -static inline void mptcp_write_space(struct sock *sk)
> > +static inline void __mptcp_sync_sndnxt(struct sock *sk, u64 new_snd_nxt)
> 
> Hi Paolo -
> 
> I think you missed my comment on this in v1, __mptcp_sync_sndnxt() isn't 
> called anywhere.

Indeed! sorry for missing it, and thank you for persisting. Yes, I must
drop the function definition. FTR, it's a left-over of a previous
implementation attempt.

Thanks!

Paolo
diff mbox series

Patch

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 486fff865803..2e197262a42e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -797,6 +797,16 @@  static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
 }
 
+static inline void mptcp_write_space(struct sock *sk)
+{
+	if (sk_stream_is_writeable(sk)) {
+		/* pairs with memory barrier in mptcp_poll */
+		smp_mb();
+		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+			sk_stream_write_space(sk);
+	}
+}
+
 static inline void __mptcp_sync_sndbuf(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -815,6 +825,7 @@  static inline void __mptcp_sync_sndbuf(struct sock *sk)
 
 	/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
 	WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+	mptcp_write_space(sk);
 }
 
 /* The called held both the msk socket and the subflow socket locks,
@@ -845,14 +856,16 @@  static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
 	local_bh_enable();
 }
 
-static inline void mptcp_write_space(struct sock *sk)
+static inline void __mptcp_sync_sndnxt(struct sock *sk, u64 new_snd_nxt)
 {
-	if (sk_stream_is_writeable(sk)) {
-		/* pairs with memory barrier in mptcp_poll */
-		smp_mb();
-		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
-			sk_stream_write_space(sk);
-	}
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	if (!after64(new_snd_nxt, msk->snd_nxt))
+		return;
+
+	msk->bytes_sent += new_snd_nxt - msk->snd_nxt;
+	WRITE_ONCE(msk->snd_nxt, new_snd_nxt);
+	mptcp_write_space(sk);
 }
 
 void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);