diff mbox series

[mptcp-next,3/7] mptcp: fix potential wake-up event loss

Message ID 27f976d6a8f1a88ed5ad3822af43d0fc648ab9f8.1705836321.git.pabeni@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Mat Martineau
Headers show
Series mptcp: implement TCP_NOTSENT_LOWAT support | expand

Commit Message

Paolo Abeni Jan. 22, 2024, 3:08 p.m. UTC
After the blamed commit below, the send buffer auto-tuning can
happen after that the mptcp_propagate_sndbuf() completes - via
the delegated action infrastructure.

We must check for write space even after such change or we risk
missing the wake-up event.

Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.h | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

Comments

Mat Martineau Feb. 8, 2024, 1:57 a.m. UTC | #1
On Mon, 22 Jan 2024, Paolo Abeni wrote:

> After the blamed commit below, the send buffer auto-tuning can
> happen after that the mptcp_propagate_sndbuf() completes - via
> the delegated action infrastructure.
>
> We must check for write space even after such change or we risk
> missing the wake-up event.
>
> Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")

Hi Paolo -

These first three patches have "Fixes:" tags, what do you think about 
applying them to mptcp-net instead of mptcp-next?

> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/protocol.h | 27 ++++++++++++++++++++-------
> 1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index d05ec76dd7c2..bbc490f36273 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -797,6 +797,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
> 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
> }
>
> +static inline void mptcp_write_space(struct sock *sk)
> +{
> +	if (sk_stream_is_writeable(sk)) {
> +		/* pairs with memory barrier in mptcp_poll */
> +		smp_mb();
> +		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
> +			sk_stream_write_space(sk);
> +	}
> +}
> +
> static inline void __mptcp_sync_sndbuf(struct sock *sk)
> {
> 	struct mptcp_subflow_context *subflow;
> @@ -815,6 +825,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
>
> 	/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
> 	WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
> +	mptcp_write_space(sk);
> }
>
> /* The called held both the msk socket and the subflow socket locks,
> @@ -845,14 +856,16 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
> 	local_bh_enable();
> }
>
> -static inline void mptcp_write_space(struct sock *sk)
> +static inline void __mptcp_sync_sndnxt(struct sock *sk, u64 new_snd_nxt)

This is never called - extra code from an experiment or did some delegated 
action code not get added?

- Mat

> {
> -	if (sk_stream_is_writeable(sk)) {
> -		/* pairs with memory barrier in mptcp_poll */
> -		smp_mb();
> -		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
> -			sk_stream_write_space(sk);
> -	}
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	if (!after64(new_snd_nxt, msk->snd_nxt))
> +		return;
> +
> +	msk->bytes_sent += new_snd_nxt - msk->snd_nxt;
> +	WRITE_ONCE(msk->snd_nxt, new_snd_nxt);
> +	mptcp_write_space(sk);
> }
>
> void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
> -- 
> 2.43.0
>
>
>
Paolo Abeni Feb. 12, 2024, 11:09 a.m. UTC | #2
On Wed, 2024-02-07 at 17:57 -0800, Mat Martineau wrote:
> On Mon, 22 Jan 2024, Paolo Abeni wrote:
> 
> > After the blamed commit below, the send buffer auto-tuning can
> > happen after that the mptcp_propagate_sndbuf() completes - via
> > the delegated action infrastructure.
> > 
> > We must check for write space even after such change or we risk
> > missing the wake-up event.
> > 
> > Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning")
> 
> Hi Paolo -
> 
> These first three patches have "Fixes:" tags, what do you think about 
> applying them to mptcp-net instead of mptcp-next?


I'm fine with that. I send them together because are needed to allow
TCP_NOTSENT_LOWAT working in a reliable way.

Cheers,

Paolo
diff mbox series

Patch

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d05ec76dd7c2..bbc490f36273 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -797,6 +797,16 @@  static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
 }
 
+static inline void mptcp_write_space(struct sock *sk)
+{
+	if (sk_stream_is_writeable(sk)) {
+		/* pairs with memory barrier in mptcp_poll */
+		smp_mb();
+		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+			sk_stream_write_space(sk);
+	}
+}
+
 static inline void __mptcp_sync_sndbuf(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -815,6 +825,7 @@  static inline void __mptcp_sync_sndbuf(struct sock *sk)
 
 	/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
 	WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+	mptcp_write_space(sk);
 }
 
 /* The called held both the msk socket and the subflow socket locks,
@@ -845,14 +856,16 @@  static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
 	local_bh_enable();
 }
 
-static inline void mptcp_write_space(struct sock *sk)
+static inline void __mptcp_sync_sndnxt(struct sock *sk, u64 new_snd_nxt)
 {
-	if (sk_stream_is_writeable(sk)) {
-		/* pairs with memory barrier in mptcp_poll */
-		smp_mb();
-		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
-			sk_stream_write_space(sk);
-	}
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	if (!after64(new_snd_nxt, msk->snd_nxt))
+		return;
+
+	msk->bytes_sent += new_snd_nxt - msk->snd_nxt;
+	WRITE_ONCE(msk->snd_nxt, new_snd_nxt);
+	mptcp_write_space(sk);
 }
 
 void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);