diff mbox series

[v2,mptcp-net,2/2] mptcp: do not reset MP_CAPABLE subflow on mapping errors

Message ID 99f56765d4939e6f21c3950ed2ee3b54369e6d5b.1621270518.git.pabeni@redhat.com (mailing list archive)
State Superseded, archived
Headers show
Series [v2,mptcp-net,1/2] mptcp: always parse mptcp options for MPC reqsk | expand

Commit Message

Paolo Abeni May 17, 2021, 4:56 p.m. UTC
When some mapping related errors occours we close the main
MPC subflow with a RST. We should instead fallback gracefully
to TCP, and do the reset only for MPJ subflows.

Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/subflow.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

Comments

Mat Martineau May 18, 2021, 12:16 a.m. UTC | #1
On Mon, 17 May 2021, Paolo Abeni wrote:

> When some mapping related errors occours we close the main
> MPC subflow with a RST. We should instead fallback gracefully
> to TCP, and do the reset only for MPJ subflows.
>
> Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/subflow.c | 39 +++++++++++++++++++--------------------
> 1 file changed, 19 insertions(+), 20 deletions(-)
>
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 278986585088..9befe9fe7bca 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -1110,10 +1110,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
>
> 		status = get_mapping_status(ssk, msk);
> 		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
> -		if (unlikely(status == MAPPING_INVALID)) {
> -			ssk->sk_err = EBADMSG;
> -			goto fatal;
> -		}
> +		if (unlikely(status == MAPPING_INVALID))
> +			goto fallback;
> +

There are a bunch of other ways to get MAPPING_INVALID during the life of 
a connection, including when there are multiple subflows active and 
fallback is not a valid option. Can the new fallback condition be more 
targeted to this "out of order / packet loss at connection time" issue so 
truly fatal MAPPING_INVALID cases still reset the connection?

-Mat


> 		if (unlikely(status == MAPPING_DUMMY))
> 			goto fallback;
>
> @@ -1128,10 +1127,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
> 		 * MP_CAPABLE-based mapping
> 		 */
> 		if (unlikely(!READ_ONCE(msk->can_ack))) {
> -			if (!subflow->mpc_map) {
> -				ssk->sk_err = EBADMSG;
> -				goto fatal;
> -			}
> +			if (!subflow->mpc_map)
> +				goto fallback;
> 			WRITE_ONCE(msk->remote_key, subflow->remote_key);
> 			WRITE_ONCE(msk->ack_seq, subflow->map_seq);
> 			WRITE_ONCE(msk->can_ack, true);
> @@ -1160,19 +1157,21 @@ static bool subflow_check_data_avail(struct sock *ssk)
> 	subflow_sched_work_if_closed(msk, ssk);
> 	return false;
>
> -fatal:
> -	/* fatal protocol error, close the socket */
> -	/* This barrier is coupled with smp_rmb() in tcp_poll() */
> -	smp_wmb();
> -	ssk->sk_error_report(ssk);
> -	tcp_set_state(ssk, TCP_CLOSE);
> -	subflow->reset_transient = 0;
> -	subflow->reset_reason = MPTCP_RST_EMPTCP;
> -	tcp_send_active_reset(ssk, GFP_ATOMIC);
> -	subflow->data_avail = 0;
> -	return false;
> -
> fallback:
> +	if (subflow->mp_join) {
> +		/* fatal protocol error, close the socket */
> +		/* This barrier is coupled with smp_rmb() in tcp_poll() */
> +		smp_wmb();
> +		ssk->sk_err = EBADMSG;
> +		ssk->sk_error_report(ssk);
> +		tcp_set_state(ssk, TCP_CLOSE);
> +		subflow->reset_transient = 0;
> +		subflow->reset_reason = MPTCP_RST_EMPTCP;
> +		tcp_send_active_reset(ssk, GFP_ATOMIC);
> +		subflow->data_avail = 0;
> +		return false;
> +	}
> +
> 	__mptcp_do_fallback(msk);
> 	skb = skb_peek(&ssk->sk_receive_queue);
> 	subflow->map_valid = 1;
> -- 
> 2.26.3
>
>
>

--
Mat Martineau
Intel
Paolo Abeni May 18, 2021, 2:17 p.m. UTC | #2
On Mon, 2021-05-17 at 17:16 -0700, Mat Martineau wrote:
> On Mon, 17 May 2021, Paolo Abeni wrote:
> 
> > When some mapping related errors occours we close the main
> > MPC subflow with a RST. We should instead fallback gracefully
> > to TCP, and do the reset only for MPJ subflows.
> > 
> > Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
> > Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > net/mptcp/subflow.c | 39 +++++++++++++++++++--------------------
> > 1 file changed, 19 insertions(+), 20 deletions(-)
> > 
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 278986585088..9befe9fe7bca 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -1110,10 +1110,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
> > 
> > 		status = get_mapping_status(ssk, msk);
> > 		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
> > -		if (unlikely(status == MAPPING_INVALID)) {
> > -			ssk->sk_err = EBADMSG;
> > -			goto fatal;
> > -		}
> > +		if (unlikely(status == MAPPING_INVALID))
> > +			goto fallback;
> > +
> 
> There are a bunch of other ways to get MAPPING_INVALID during the life of 
> a connection, including when there are multiple subflows active and 
> fallback is not a valid option. Can the new fallback condition be more 
> targeted to this "out of order / packet loss at connection time" issue so 
> truly fatal MAPPING_INVALID cases still reset the connection?

What we should do if we get a MAPPING_INVALID and we have a single
(MPC) subflow? I could not find any specific reference in the RFC. I
think it's roughly the same as 'no mapping' at all: if we can fallback
we do fallback, otherwise we reset.

Note that with this patch, under the 'fallback' label, checks if a
reset is needed or not. The current patch does a reset only if the
subflow is an MP_JOIN one, but that condition could be additionally
extended to 'msk has multiple subflows' - even if the latter looks like
a net-next patch.

WDYT?

Thanks!

Paolo
Mat Martineau May 18, 2021, 5:28 p.m. UTC | #3
On Tue, 18 May 2021, Paolo Abeni wrote:

> On Mon, 2021-05-17 at 17:16 -0700, Mat Martineau wrote:
>> On Mon, 17 May 2021, Paolo Abeni wrote:
>>
>>> When some mapping related errors occours we close the main
>>> MPC subflow with a RST. We should instead fallback gracefully
>>> to TCP, and do the reset only for MPJ subflows.
>>>
>>> Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
>>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192
>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>> ---
>>> net/mptcp/subflow.c | 39 +++++++++++++++++++--------------------
>>> 1 file changed, 19 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
>>> index 278986585088..9befe9fe7bca 100644
>>> --- a/net/mptcp/subflow.c
>>> +++ b/net/mptcp/subflow.c
>>> @@ -1110,10 +1110,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
>>>
>>> 		status = get_mapping_status(ssk, msk);
>>> 		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
>>> -		if (unlikely(status == MAPPING_INVALID)) {
>>> -			ssk->sk_err = EBADMSG;
>>> -			goto fatal;
>>> -		}
>>> +		if (unlikely(status == MAPPING_INVALID))
>>> +			goto fallback;
>>> +
>>
>> There are a bunch of other ways to get MAPPING_INVALID during the life of
>> a connection, including when there are multiple subflows active and
>> fallback is not a valid option. Can the new fallback condition be more
>> targeted to this "out of order / packet loss at connection time" issue so
>> truly fatal MAPPING_INVALID cases still reset the connection?
>
> What we should do if we get a MAPPING_INVALID and we have a single
> (MPC) subflow? I could not find any specific reference in the RFC. I
> think it's roughly the same as 'no mapping' at all: if we can fallback
> we do fallback, otherwise we reset.

Looking at section 3.7, there aren't many cases where fallback is an 
option during operation (after the initial data has been successfully 
acked in each direction).

"If a subflow breaks during operation ... then once this is detected ...
the subflow SHOULD be treated as broken and closed with a RST, since no 
data can be delivered to the application layer and no fallback signal
can be reliably sent."

In general, it looks like a bad mapping should reset the *subflow* (rather 
than the whole MPTCP connection) - what I said above was unclear about 
what exactly should be reset.

It seems like the only way to fall back the whole connection later in its 
life involves checksum failure. In that case the RFC describes the 
conditions where an "infinite mapping" can be used for fallback - but 
that's different from the beginning-of-connection fallback code we have 
today.

>
> Note that with this patch, under the 'fallback' label, checks if a
> reset is needed or not. The current patch does a reset only if the
> subflow is an MP_JOIN one, but that condition could be additionally
> extended to 'msk has multiple subflows' - even if the latter looks like
> a net-next patch.
>
> WDYT?

As long as the subflow is reset when the mapping failed, that's fine. It 
doesn't seem like there are conditions during operation (long after 
connection time) where it's correct to do fallback without an infinite 
mapping.

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 278986585088..9befe9fe7bca 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1110,10 +1110,9 @@  static bool subflow_check_data_avail(struct sock *ssk)
 
 		status = get_mapping_status(ssk, msk);
 		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
-		if (unlikely(status == MAPPING_INVALID)) {
-			ssk->sk_err = EBADMSG;
-			goto fatal;
-		}
+		if (unlikely(status == MAPPING_INVALID))
+			goto fallback;
+
 		if (unlikely(status == MAPPING_DUMMY))
 			goto fallback;
 
@@ -1128,10 +1127,8 @@  static bool subflow_check_data_avail(struct sock *ssk)
 		 * MP_CAPABLE-based mapping
 		 */
 		if (unlikely(!READ_ONCE(msk->can_ack))) {
-			if (!subflow->mpc_map) {
-				ssk->sk_err = EBADMSG;
-				goto fatal;
-			}
+			if (!subflow->mpc_map)
+				goto fallback;
 			WRITE_ONCE(msk->remote_key, subflow->remote_key);
 			WRITE_ONCE(msk->ack_seq, subflow->map_seq);
 			WRITE_ONCE(msk->can_ack, true);
@@ -1160,19 +1157,21 @@  static bool subflow_check_data_avail(struct sock *ssk)
 	subflow_sched_work_if_closed(msk, ssk);
 	return false;
 
-fatal:
-	/* fatal protocol error, close the socket */
-	/* This barrier is coupled with smp_rmb() in tcp_poll() */
-	smp_wmb();
-	ssk->sk_error_report(ssk);
-	tcp_set_state(ssk, TCP_CLOSE);
-	subflow->reset_transient = 0;
-	subflow->reset_reason = MPTCP_RST_EMPTCP;
-	tcp_send_active_reset(ssk, GFP_ATOMIC);
-	subflow->data_avail = 0;
-	return false;
-
 fallback:
+	if (subflow->mp_join) {
+		/* fatal protocol error, close the socket */
+		/* This barrier is coupled with smp_rmb() in tcp_poll() */
+		smp_wmb();
+		ssk->sk_err = EBADMSG;
+		ssk->sk_error_report(ssk);
+		tcp_set_state(ssk, TCP_CLOSE);
+		subflow->reset_transient = 0;
+		subflow->reset_reason = MPTCP_RST_EMPTCP;
+		tcp_send_active_reset(ssk, GFP_ATOMIC);
+		subflow->data_avail = 0;
+		return false;
+	}
+
 	__mptcp_do_fallback(msk);
 	skb = skb_peek(&ssk->sk_receive_queue);
 	subflow->map_valid = 1;