diff mbox series

[v6,mptcp-next,3/5] mptcp: send out MP_FAIL when data checksum fails

Message ID 19469d95c33169d6e4dd553394ab4466756ff001.1627464017.git.geliangtang@xiaomi.com (mailing list archive)
State Superseded, archived
Headers show
Series MP_FAIL support | expand

Commit Message

Geliang Tang July 28, 2021, 9:35 a.m. UTC
From: Geliang Tang <geliangtang@xiaomi.com>

When a bad checksum is detected, set the send_mp_fail flag to send out
the MP_FAIL option.

Add a new function mptcp_has_another_subflow() to check whether there's
only a single subflow.

When multiple subflows are in use, close the affected subflow with a RST
that includes an MP_FAIL option and discard the data with the bad
checksum.

Set the sk_state of the subsocket to TCP_CLOSE, then the flag
MPTCP_WORK_CLOSE_SUBFLOW will be set in subflow_sched_work_if_closed,
and the subflow will be closed.

When a single subfow is in use, send back an MP_FAIL option on the
subflow-level ACK. And the receiver of this MP_FAIL respond with an
MP_FAIL in the reverse direction.

Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
---
 net/mptcp/pm.c       | 14 ++++++++++++++
 net/mptcp/protocol.h | 14 ++++++++++++++
 net/mptcp/subflow.c  | 17 +++++++++++++++++
 3 files changed, 45 insertions(+)

Comments

Mat Martineau July 28, 2021, 11:31 p.m. UTC | #1
On Wed, 28 Jul 2021, Geliang Tang wrote:

> From: Geliang Tang <geliangtang@xiaomi.com>
>
> When a bad checksum is detected, set the send_mp_fail flag to send out
> the MP_FAIL option.
>
> Add a new function mptcp_has_another_subflow() to check whether there's
> only a single subflow.
>
> When multiple subflows are in use, close the affected subflow with a RST
> that includes an MP_FAIL option and discard the data with the bad
> checksum.
>

Thanks for the test code! I do see in wireshark that the multiple subflow 
case sends a TCP RST with both MP_FAIL and MP_TCPRST options when the 
checksum fails on one subflow.

> Set the sk_state of the subsocket to TCP_CLOSE, then the flag
> MPTCP_WORK_CLOSE_SUBFLOW will be set in subflow_sched_work_if_closed,
> and the subflow will be closed.
>
> When a single subfow is in use, send back an MP_FAIL option on the
> subflow-level ACK. And the receiver of this MP_FAIL respond with an
> MP_FAIL in the reverse direction.
>

The single subflow case has some unexpected behavior:

1. The checksum failure is detected, a packet is sent with MP_FAIL. 
However, the packet also has data payload and no DSS option.

2. The peer receives MP_FAIL, and echoes back. But it sends two TCP RST + 
MP_FAIL packets back-to-back.

I'll upload a pcap to 
https://github.com/multipath-tcp/mptcp_net-next/issues/52

I think the right temporary behavior (before implementing infinite 
mappings) for single subflow checksum failure is to do what the RFC says 
for non-contiguous data: "In the rare case that the data is not contiguous 
(which could happen when there is only one subflow but it is 
retransmitting data from a subflow that has recently been uncleanly 
closed), the receiver MUST close the subflow with a RST with MP_FAIL." So, 
in step 1 above the peer that detected the bad checksum would still send 
MP_FAIL but with the RST flag. And then the echo would not be needed 
because the path would already be disconnected by the RST.

What do you think?


Mat


> Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
> ---
> net/mptcp/pm.c       | 14 ++++++++++++++
> net/mptcp/protocol.h | 14 ++++++++++++++
> net/mptcp/subflow.c  | 17 +++++++++++++++++
> 3 files changed, 45 insertions(+)
>
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index 6ab386ff3294..c2df5cc28ba1 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -251,7 +251,21 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
>
> void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
> {
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +
> 	pr_debug("fail_seq=%llu", fail_seq);
> +
> +	if (!mptcp_has_another_subflow(sk)) {
> +		if (!subflow->mp_fail_expect_echo) {
> +			subflow->send_mp_fail = 1;
> +		} else {
> +			subflow->mp_fail_expect_echo = 0;
> +			/* TODO the single-subflow case is temporarily
> +			 * handled by reset.
> +			 */
> +			mptcp_subflow_reset(sk);
> +		}
> +	}
> }
>
> /* path manager helpers */
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 09d0e9406ea9..c46011318f65 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -434,6 +434,7 @@ struct mptcp_subflow_context {
> 		backup : 1,
> 		send_mp_prio : 1,
> 		send_mp_fail : 1,
> +		mp_fail_expect_echo : 1,
> 		rx_eof : 1,
> 		can_ack : 1,        /* only after processing the remote a key */
> 		disposable : 1,	    /* ctx can be free at ulp release time */
> @@ -615,6 +616,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
> 	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
> }
>
> +static inline bool mptcp_has_another_subflow(struct sock *ssk)
> +{
> +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
> +	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> +
> +	mptcp_for_each_subflow(msk, tmp) {
> +		if (tmp != subflow)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> void __init mptcp_proto_init(void);
> #if IS_ENABLED(CONFIG_MPTCP_IPV6)
> int __init mptcp_proto_v6_init(void);
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 1151926d335b..a69839520472 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -910,6 +910,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
> 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
> 	if (unlikely(csum_fold(csum))) {
> 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
> +		subflow->send_mp_fail = 1;
> 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
> 	}
>
> @@ -1157,6 +1158,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
>
> fallback:
> 	/* RFC 8684 section 3.7. */
> +	if (subflow->send_mp_fail) {
> +		if (mptcp_has_another_subflow(ssk)) {
> +			ssk->sk_err = EBADMSG;
> +			tcp_set_state(ssk, TCP_CLOSE);
> +			subflow->reset_transient = 0;
> +			subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
> +			tcp_send_active_reset(ssk, GFP_ATOMIC);
> +			while ((skb = skb_peek(&ssk->sk_receive_queue)))
> +				sk_eat_skb(ssk, skb);
> +		} else {
> +			subflow->mp_fail_expect_echo = 1;
> +		}
> +		WRITE_ONCE(subflow->data_avail, 0);
> +		return true;
> +	}
> +
> 	if (subflow->mp_join || subflow->fully_established) {
> 		/* fatal protocol error, close the socket.
> 		 * subflow_error_report() will introduce the appropriate barriers
> -- 
> 2.31.1
>
>
>

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6ab386ff3294..c2df5cc28ba1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -251,7 +251,21 @@  void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
 
 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
 {
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
 	pr_debug("fail_seq=%llu", fail_seq);
+
+	if (!mptcp_has_another_subflow(sk)) {
+		if (!subflow->mp_fail_expect_echo) {
+			subflow->send_mp_fail = 1;
+		} else {
+			subflow->mp_fail_expect_echo = 0;
+			/* TODO the single-subflow case is temporarily
+			 * handled by reset.
+			 */
+			mptcp_subflow_reset(sk);
+		}
+	}
 }
 
 /* path manager helpers */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 09d0e9406ea9..c46011318f65 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -434,6 +434,7 @@  struct mptcp_subflow_context {
 		backup : 1,
 		send_mp_prio : 1,
 		send_mp_fail : 1,
+		mp_fail_expect_echo : 1,
 		rx_eof : 1,
 		can_ack : 1,        /* only after processing the remote a key */
 		disposable : 1,	    /* ctx can be free at ulp release time */
@@ -615,6 +616,19 @@  static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
 	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
 }
 
+static inline bool mptcp_has_another_subflow(struct sock *ssk)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+	mptcp_for_each_subflow(msk, tmp) {
+		if (tmp != subflow)
+			return true;
+	}
+
+	return false;
+}
+
 void __init mptcp_proto_init(void);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 int __init mptcp_proto_v6_init(void);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1151926d335b..a69839520472 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -910,6 +910,7 @@  static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
 	if (unlikely(csum_fold(csum))) {
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+		subflow->send_mp_fail = 1;
 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
 	}
 
@@ -1157,6 +1158,22 @@  static bool subflow_check_data_avail(struct sock *ssk)
 
 fallback:
 	/* RFC 8684 section 3.7. */
+	if (subflow->send_mp_fail) {
+		if (mptcp_has_another_subflow(ssk)) {
+			ssk->sk_err = EBADMSG;
+			tcp_set_state(ssk, TCP_CLOSE);
+			subflow->reset_transient = 0;
+			subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+			tcp_send_active_reset(ssk, GFP_ATOMIC);
+			while ((skb = skb_peek(&ssk->sk_receive_queue)))
+				sk_eat_skb(ssk, skb);
+		} else {
+			subflow->mp_fail_expect_echo = 1;
+		}
+		WRITE_ONCE(subflow->data_avail, 0);
+		return true;
+	}
+
 	if (subflow->mp_join || subflow->fully_established) {
 		/* fatal protocol error, close the socket.
 		 * subflow_error_report() will introduce the appropriate barriers