diff mbox series

[mptcp-net] mptcp: fix sk_forward_memory corruption under memory pressure

Message ID b84d11a2f4c33e69637567e64138a453611064dc.1620986525.git.pabeni@redhat.com (mailing list archive)
State Accepted, archived
Commit e31f8077084df8ba1008bed6b03bbbb546f7b9f6
Delegated to: Matthieu Baerts
Headers show
Series [mptcp-net] mptcp: fix sk_forward_memory corruption under memory pressure | expand

Commit Message

Paolo Abeni May 14, 2021, 10:05 a.m. UTC
MPTCP sk_forward_memory handling is a bit special, as such field
is protected by the msk socket spin_lock, instead of the plain
socket lock.

Currently we have a code path updating such field without handling
the relevant lock:

__mptcp_retrans() -> __mptcp_clean_una_wakeup() -> __mptcp_update_wmem()

We can hit the above only under memory pressure. When that happen,
forward memory accounting could be corrupted, as reported by Mat.

Address the issue creating a new variant of __mptcp_clean_una_wakeup()
which handle fwd memory updates with the proper care and invoking
such new helper in the relevant code path.

Reported-by: 'Matthieu Baerts <matthieu.baerts@tessares.net>'
Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

Comments

Mat Martineau May 14, 2021, 9:44 p.m. UTC | #1
On Fri, 14 May 2021, Paolo Abeni wrote:

> MPTCP sk_forward_memory handling is a bit special, as such field
> is protected by the msk socket spin_lock, instead of the plain
> socket lock.
>
> Currently we have a code path updating such field without handling
> the relevant lock:
>
> __mptcp_retrans() -> __mptcp_clean_una_wakeup() -> __mptcp_update_wmem()
>
> We can hit the above only under memory pressure. When that happen,
> forward memory accounting could be corrupted, as reported by Mat.
>

Me? :)

> Address the issue creating a new variant of __mptcp_clean_una_wakeup()
> which handle fwd memory updates with the proper care and invoking
> such new helper in the relevant code path.
>
> Reported-by: 'Matthieu Baerts <matthieu.baerts@tessares.net>'
> Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions")

and
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/172

correct?

> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++--------
> 1 file changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2d21a4793d9d..1d5d451e9211 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
>
> +#ifdef CONFIG_LOCKDEP
> +	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
> +#endif
> +
> 	if (!msk->wmem_reserved)
> 		return;
>
> @@ -1027,7 +1031,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
> 	put_page(dfrag->page);
> }
>
> -static void __mptcp_clean_una(struct sock *sk)
> +static bool __mptcp_do_clean_una(struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> 	struct mptcp_data_frag *dtmp, *dfrag;
> @@ -1068,12 +1072,6 @@ static void __mptcp_clean_una(struct sock *sk)
> 	}
>
> out:
> -	if (cleaned) {
> -		if (tcp_under_memory_pressure(sk)) {
> -			__mptcp_update_wmem(sk);
> -			sk_mem_reclaim_partial(sk);
> -		}
> -	}
>
> 	if (snd_una == READ_ONCE(msk->snd_nxt)) {
> 		if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
> @@ -1081,6 +1079,15 @@ static void __mptcp_clean_una(struct sock *sk)
> 	} else {
> 		mptcp_reset_timer(sk);
> 	}
> +	return cleaned;
> +}
> +
> +static void __mptcp_clean_una(struct sock *sk)
> +{
> +	if (__mptcp_do_clean_una(sk) && tcp_under_memory_pressure(sk)) {
> +		__mptcp_update_wmem(sk);
> +		sk_mem_reclaim_partial(sk);
> +	}
> }
>
> static void __mptcp_clean_una_wakeup(struct sock *sk)
> @@ -1089,6 +1096,19 @@ static void __mptcp_clean_una_wakeup(struct sock *sk)
> 	mptcp_write_space(sk);
> }
>
> +/* variant __mptcp_clean_una_wakeup() for caller owning the msk socket lock,
> + * but not the msk_data_lock/msk socket spin lock
> + */
> +static void mptcp_clean_una_wakeup(struct sock *sk)
> +{
> +#ifdef CONFIG_LOCKDEP
> +	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock));
> +#endif
> +	if (__mptcp_do_clean_una(sk) && tcp_under_memory_pressure(sk))
> +		mptcp_mem_reclaim_partial(sk);
> +	mptcp_write_space(sk);
> +}
> +
> static void mptcp_enter_memory_pressure(struct sock *sk)
> {
> 	struct mptcp_subflow_context *subflow;
> @@ -2299,7 +2319,7 @@ static void __mptcp_retrans(struct sock *sk)
> 	struct sock *ssk;
> 	int ret;
>
> -	__mptcp_clean_una_wakeup(sk);
> +	mptcp_clean_una_wakeup(sk);
> 	dfrag = mptcp_rtx_head(sk);
> 	if (!dfrag) {
> 		if (mptcp_data_fin_enabled(msk)) {
> -- 
> 2.26.3

Looks good to me. Thanks Paolo.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>

--
Mat Martineau
Intel
Matthieu Baerts May 17, 2021, 7:15 p.m. UTC | #2
Hi Paolo, Mat,

On 14/05/2021 12:05, Paolo Abeni wrote:
> MPTCP sk_forward_memory handling is a bit special, as such field
> is protected by the msk socket spin_lock, instead of the plain
> socket lock.
> 
> Currently we have a code path updating such field without handling
> the relevant lock:
> 
> __mptcp_retrans() -> __mptcp_clean_una_wakeup() -> __mptcp_update_wmem()
> 
> We can hit the above only under memory pressure. When that happen,
> forward memory accounting could be corrupted, as reported by Mat.
> 
> Address the issue creating a new variant of __mptcp_clean_una_wakeup()
> which handle fwd memory updates with the proper care and invoking
> such new helper in the relevant code path.
> 
> Reported-by: 'Matthieu Baerts <matthieu.baerts@tessares.net>'
> Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

Thank you for the patch and the review!

Now in our tree with Mat's RvB tag, a small checkpatch fix, my Tested-by
tag, s/Mat/Matt/ and the Closes tag.

- e31f8077084d: mptcp: fix sk_forward_memory corruption under memory
pressure
- Results: 36a7db06660a..6273c44bc200

Builds and tests are now in progress:

https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20210517T191245
https://github.com/multipath-tcp/mptcp_net-next/actions/workflows/build-validation.yml?query=branch:export/20210517T191245

Cheers,
Matt
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d21a4793d9d..1d5d451e9211 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -947,6 +947,10 @@  static void __mptcp_update_wmem(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
+
 	if (!msk->wmem_reserved)
 		return;
 
@@ -1027,7 +1031,7 @@  static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
 	put_page(dfrag->page);
 }
 
-static void __mptcp_clean_una(struct sock *sk)
+static bool __mptcp_do_clean_una(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_data_frag *dtmp, *dfrag;
@@ -1068,12 +1072,6 @@  static void __mptcp_clean_una(struct sock *sk)
 	}
 
 out:
-	if (cleaned) {
-		if (tcp_under_memory_pressure(sk)) {
-			__mptcp_update_wmem(sk);
-			sk_mem_reclaim_partial(sk);
-		}
-	}
 
 	if (snd_una == READ_ONCE(msk->snd_nxt)) {
 		if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
@@ -1081,6 +1079,15 @@  static void __mptcp_clean_una(struct sock *sk)
 	} else {
 		mptcp_reset_timer(sk);
 	}
+	return cleaned;
+}
+
+static void __mptcp_clean_una(struct sock *sk)
+{
+	if (__mptcp_do_clean_una(sk) && tcp_under_memory_pressure(sk)) {
+		__mptcp_update_wmem(sk);
+		sk_mem_reclaim_partial(sk);
+	}
 }
 
 static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1089,6 +1096,19 @@  static void __mptcp_clean_una_wakeup(struct sock *sk)
 	mptcp_write_space(sk);
 }
 
+/* variant __mptcp_clean_una_wakeup() for caller owning the msk socket lock,
+ * but not the msk_data_lock/msk socket spin lock
+ */
+static void mptcp_clean_una_wakeup(struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock));
+#endif
+	if (__mptcp_do_clean_una(sk) && tcp_under_memory_pressure(sk))
+		mptcp_mem_reclaim_partial(sk);
+	mptcp_write_space(sk);
+}
+
 static void mptcp_enter_memory_pressure(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -2299,7 +2319,7 @@  static void __mptcp_retrans(struct sock *sk)
 	struct sock *ssk;
 	int ret;
 
-	__mptcp_clean_una_wakeup(sk);
+	mptcp_clean_una_wakeup(sk);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag) {
 		if (mptcp_data_fin_enabled(msk)) {