diff mbox series

[mptcp-next,v2,1/9] mptcp: add noncontiguous flag

Message ID 138e3913108d313b11a261e6c9e3db2cc788183f.1631188109.git.geliangtang@xiaomi.com (mailing list archive)
State Superseded, archived
Headers show
Series The infinite mapping support | expand

Commit Message

Geliang Tang Sept. 9, 2021, 11:51 a.m. UTC
From: Geliang Tang <geliangtang@xiaomi.com>

This patch added a "noncontiguous" flag in the msk to track whether the
data is contiguous on a subflow. When retransmission happens, we could
set this flag, and once all retransmissions are DATA_ACK'd that flag
could be cleared.

When a bad checksum is detected and a single contiguous subflow is in
use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead.

Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
---
 net/mptcp/protocol.c |  7 +++++++
 net/mptcp/protocol.h |  2 ++
 net/mptcp/subflow.c  | 12 ++++++------
 3 files changed, 15 insertions(+), 6 deletions(-)

Comments

Paolo Abeni Sept. 9, 2021, 1:25 p.m. UTC | #1
On Thu, 2021-09-09 at 19:51 +0800, Geliang Tang wrote:
> From: Geliang Tang <geliangtang@xiaomi.com>
> 
> This patch added a "noncontiguous" flag in the msk to track whether the
> data is contiguous on a subflow. When retransmission happens, we could
> set this flag, and once all retransmissions are DATA_ACK'd that flag
> could be cleared.
> 
> When a bad checksum is detected and a single contiguous subflow is in
> use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead.
> 
> Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
> ---
>  net/mptcp/protocol.c |  7 +++++++
>  net/mptcp/protocol.h |  2 ++
>  net/mptcp/subflow.c  | 12 ++++++------
>  3 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index bb8a2a231479..81ea03b9fff6 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk)
>  
>  		dfrag_uncharge(sk, delta);
>  		cleaned = true;
> +
> +		if (dfrag->resend_count == 0)
> +			WRITE_ONCE(msk->noncontiguous, false);
>  	}
>  
>  	/* all retransmitted data acked, recovery completed */
> @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
>  	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
>  	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
>  	dfrag->already_sent = 0;
> +	dfrag->resend_count = 0;
>  	dfrag->page = pfrag->page;
>  
>  	return dfrag;
> @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk)
>  		dfrag->already_sent = max(dfrag->already_sent, info.sent);
>  		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
>  			 info.size_goal);
> +		dfrag->resend_count++;
> +		WRITE_ONCE(msk->noncontiguous, true);
>  	}
>  
>  	release_sock(ssk);
> @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
>  	WRITE_ONCE(msk->fully_established, false);
>  	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
>  		WRITE_ONCE(msk->csum_enabled, true);
> +	WRITE_ONCE(msk->noncontiguous, false);
>  
>  	msk->write_seq = subflow_req->idsn + 1;
>  	msk->snd_nxt = msk->write_seq;
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index d3e6fd1615f1..011f84ae1593 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -213,6 +213,7 @@ struct mptcp_data_frag {
>  	u16 offset;
>  	u16 overhead;
>  	u16 already_sent;
> +	u16 resend_count;
>  	struct page *page;
>  };

Ouch, the above increases mptcp_data_frag size by 8 bytes, due to
holes.

Is this necessary? I think the packet scheduler never reinject with a
single subflow. It used to do that, but it should not do anymore.

Even if the scheduler re-inject with a single subflow, can we instead
keep track of the last retrans sequence number - in __mptcp_retrans().

msk stream is 'continuous' if and only if 'before64(last_retrnas_seq,
msk->snd_una)'

/P
Mat Martineau Sept. 10, 2021, midnight UTC | #2
On Thu, 9 Sep 2021, Paolo Abeni wrote:

> On Thu, 2021-09-09 at 19:51 +0800, Geliang Tang wrote:
>> From: Geliang Tang <geliangtang@xiaomi.com>
>>
>> This patch added a "noncontiguous" flag in the msk to track whether the
>> data is contiguous on a subflow. When retransmission happens, we could
>> set this flag, and once all retransmissions are DATA_ACK'd that flag
>> could be cleared.
>>
>> When a bad checksum is detected and a single contiguous subflow is in
>> use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead.
>>
>> Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
>> ---
>>  net/mptcp/protocol.c |  7 +++++++
>>  net/mptcp/protocol.h |  2 ++
>>  net/mptcp/subflow.c  | 12 ++++++------
>>  3 files changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index bb8a2a231479..81ea03b9fff6 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk)
>>
>>  		dfrag_uncharge(sk, delta);
>>  		cleaned = true;
>> +
>> +		if (dfrag->resend_count == 0)
>> +			WRITE_ONCE(msk->noncontiguous, false);
>>  	}
>>
>>  	/* all retransmitted data acked, recovery completed */
>> @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
>>  	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
>>  	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
>>  	dfrag->already_sent = 0;
>> +	dfrag->resend_count = 0;
>>  	dfrag->page = pfrag->page;
>>
>>  	return dfrag;
>> @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk)
>>  		dfrag->already_sent = max(dfrag->already_sent, info.sent);
>>  		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
>>  			 info.size_goal);
>> +		dfrag->resend_count++;
>> +		WRITE_ONCE(msk->noncontiguous, true);
>>  	}
>>
>>  	release_sock(ssk);
>> @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
>>  	WRITE_ONCE(msk->fully_established, false);
>>  	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
>>  		WRITE_ONCE(msk->csum_enabled, true);
>> +	WRITE_ONCE(msk->noncontiguous, false);
>>
>>  	msk->write_seq = subflow_req->idsn + 1;
>>  	msk->snd_nxt = msk->write_seq;
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index d3e6fd1615f1..011f84ae1593 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -213,6 +213,7 @@ struct mptcp_data_frag {
>>  	u16 offset;
>>  	u16 overhead;
>>  	u16 already_sent;
>> +	u16 resend_count;
>>  	struct page *page;
>>  };
>
> Ouch, the above increases mptcp_data_frag size by 8 bytes, due to
> holes.
>

What about rearranging the struct to eliminate the holes? (Full 
disclosure: I asked Geliang to add this, but am open to other solutions)

I was also thinking it could be useful to have this information around for 
retransmission metrics, but that doesn't seem too important.

> Is this necessary? I think the packet scheduler never reinject with a
> single subflow. It used to do that, but it should not do anymore.
>

There may be a single subflow now, but multiple subflows (with 
unacked reinjections) a very short time ago.

> Even if the scheduler re-inject with a single subflow, can we instead
> keep track of the last retrans sequence number - in __mptcp_retrans().
>
> msk stream is 'continuous' if and only if 'before64(last_retrnas_seq,
> msk->snd_una)'
>

If last_retrans_seq is checked only when msk->noncontiguous is true, I 
think that works out. Otherwise the last_retrans_seq is stale/invalid if 
retransmission never happened, or any retransmissions have been fully 
acked.


--
Mat Martineau
Intel
Paolo Abeni Sept. 10, 2021, 3:08 p.m. UTC | #3
On Thu, 2021-09-09 at 17:00 -0700, Mat Martineau wrote:
> > On Thu, 9 Sep 2021, Paolo Abeni wrote:
> > msk stream is 'continuous' if and only if 'before64(last_retrnas_seq,
> > msk->snd_una)'
> > 
> 
> If last_retrans_seq is checked only when msk->noncontiguous is true, I 
> think that works out. Otherwise the last_retrans_seq is stale/invalid if 
> retransmission never happened, or any retransmissions have been fully 
> acked.

I think can we just init 'last_retrans_seq' to intial_seq - 1 at msk
creation time and we could always use the above check:

	if (before64(last_retrnas_seq, msk->snd_una))

WDYT?

Cheers,

Paolo
Mat Martineau Sept. 10, 2021, 4:49 p.m. UTC | #4
On Fri, 10 Sep 2021, Paolo Abeni wrote:

> On Thu, 2021-09-09 at 17:00 -0700, Mat Martineau wrote:
>>> On Thu, 9 Sep 2021, Paolo Abeni wrote:
>>> msk stream is 'continuous' if and only if 'before64(last_retrnas_seq,
>>> msk->snd_una)'
>>>
>>
>> If last_retrans_seq is checked only when msk->noncontiguous is true, I
>> think that works out. Otherwise the last_retrans_seq is stale/invalid if
>> retransmission never happened, or any retransmissions have been fully
>> acked.
>
> I think can we just init 'last_retrans_seq' to intial_seq - 1 at msk
> creation time and we could always use the above check:
>
> 	if (before64(last_retrnas_seq, msk->snd_una))
>
> WDYT?
>

Ah, ok. Initialize it like that, and then keep moving last_retrans_seq 
ahead when updating snd_una and the MPTCP stream *is* contiguous, so we 
don't run in to problems when snd_una wraps around relative to 
last_retrans_seq.

And be sure to set last_retrans_seq to the sequence number for the end of 
the retransmitted data when entering recovery as well as in 
__mptcp_retrans().

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bb8a2a231479..81ea03b9fff6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1095,6 +1095,9 @@  static void __mptcp_clean_una(struct sock *sk)
 
 		dfrag_uncharge(sk, delta);
 		cleaned = true;
+
+		if (dfrag->resend_count == 0)
+			WRITE_ONCE(msk->noncontiguous, false);
 	}
 
 	/* all retransmitted data acked, recovery completed */
@@ -1171,6 +1174,7 @@  mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
 	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
 	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
 	dfrag->already_sent = 0;
+	dfrag->resend_count = 0;
 	dfrag->page = pfrag->page;
 
 	return dfrag;
@@ -2454,6 +2458,8 @@  static void __mptcp_retrans(struct sock *sk)
 		dfrag->already_sent = max(dfrag->already_sent, info.sent);
 		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
 			 info.size_goal);
+		dfrag->resend_count++;
+		WRITE_ONCE(msk->noncontiguous, true);
 	}
 
 	release_sock(ssk);
@@ -2872,6 +2878,7 @@  struct sock *mptcp_sk_clone(const struct sock *sk,
 	WRITE_ONCE(msk->fully_established, false);
 	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
 		WRITE_ONCE(msk->csum_enabled, true);
+	WRITE_ONCE(msk->noncontiguous, false);
 
 	msk->write_seq = subflow_req->idsn + 1;
 	msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3e6fd1615f1..011f84ae1593 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -213,6 +213,7 @@  struct mptcp_data_frag {
 	u16 offset;
 	u16 overhead;
 	u16 already_sent;
+	u16 resend_count;
 	struct page *page;
 };
 
@@ -249,6 +250,7 @@  struct mptcp_sock {
 	bool		rcv_fastclose;
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 	bool		csum_enabled;
+	bool		noncontiguous;
 	spinlock_t	join_list_lock;
 	struct work_struct work;
 	struct sk_buff  *ooo_last_skb;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1de7ce883c37..951aafb6021e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1166,15 +1166,15 @@  static bool subflow_check_data_avail(struct sock *ssk)
 fallback:
 	/* RFC 8684 section 3.7. */
 	if (subflow->send_mp_fail) {
-		if (mptcp_has_another_subflow(ssk)) {
+		if (mptcp_has_another_subflow(ssk) || READ_ONCE(msk->noncontiguous)) {
+			ssk->sk_err = EBADMSG;
+			tcp_set_state(ssk, TCP_CLOSE);
+			subflow->reset_transient = 0;
+			subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+			tcp_send_active_reset(ssk, GFP_ATOMIC);
 			while ((skb = skb_peek(&ssk->sk_receive_queue)))
 				sk_eat_skb(ssk, skb);
 		}
-		ssk->sk_err = EBADMSG;
-		tcp_set_state(ssk, TCP_CLOSE);
-		subflow->reset_transient = 0;
-		subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
-		tcp_send_active_reset(ssk, GFP_ATOMIC);
 		WRITE_ONCE(subflow->data_avail, 0);
 		return true;
 	}