Message ID | 138e3913108d313b11a261e6c9e3db2cc788183f.1631188109.git.geliangtang@xiaomi.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | The infinite mapping support | expand |
On Thu, 2021-09-09 at 19:51 +0800, Geliang Tang wrote: > From: Geliang Tang <geliangtang@xiaomi.com> > > This patch added a "noncontiguous" flag in the msk to track whether the > data is contiguous on a subflow. When retransmission happens, we could > set this flag, and once all retransmissions are DATA_ACK'd that flag > could be cleared. > > When a bad checksum is detected and a single contiguous subflow is in > use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead. > > Signed-off-by: Geliang Tang <geliangtang@xiaomi.com> > --- > net/mptcp/protocol.c | 7 +++++++ > net/mptcp/protocol.h | 2 ++ > net/mptcp/subflow.c | 12 ++++++------ > 3 files changed, 15 insertions(+), 6 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index bb8a2a231479..81ea03b9fff6 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk) > > dfrag_uncharge(sk, delta); > cleaned = true; > + > + if (dfrag->resend_count == 0) > + WRITE_ONCE(msk->noncontiguous, false); > } > > /* all retransmitted data acked, recovery completed */ > @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, > dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); > dfrag->offset = offset + sizeof(struct mptcp_data_frag); > dfrag->already_sent = 0; > + dfrag->resend_count = 0; > dfrag->page = pfrag->page; > > return dfrag; > @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk) > dfrag->already_sent = max(dfrag->already_sent, info.sent); > tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, > info.size_goal); > + dfrag->resend_count++; > + WRITE_ONCE(msk->noncontiguous, true); > } > > release_sock(ssk); > @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, > WRITE_ONCE(msk->fully_established, false); > if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) > WRITE_ONCE(msk->csum_enabled, true); > + WRITE_ONCE(msk->noncontiguous, false); > > msk->write_seq = subflow_req->idsn + 1; > msk->snd_nxt = msk->write_seq; > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index d3e6fd1615f1..011f84ae1593 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -213,6 +213,7 @@ struct mptcp_data_frag { > u16 offset; > u16 overhead; > u16 already_sent; > + u16 resend_count; > struct page *page; > }; Ouch, the above increases mptcp_data_frag size by 8 bytes, due to holes. Is this necessary? I think the packet scheduler never reinject with a single subflow. It used to do that, but it should not do anymore. Even if the scheduler re-inject with a single subflow, can we instead keep track of the last retrans sequence number - in __mptcp_retrans(). msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, msk->snd_una)' /P
On Thu, 9 Sep 2021, Paolo Abeni wrote: > On Thu, 2021-09-09 at 19:51 +0800, Geliang Tang wrote: >> From: Geliang Tang <geliangtang@xiaomi.com> >> >> This patch added a "noncontiguous" flag in the msk to track whether the >> data is contiguous on a subflow. When retransmission happens, we could >> set this flag, and once all retransmissions are DATA_ACK'd that flag >> could be cleared. >> >> When a bad checksum is detected and a single contiguous subflow is in >> use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead. >> >> Signed-off-by: Geliang Tang <geliangtang@xiaomi.com> >> --- >> net/mptcp/protocol.c | 7 +++++++ >> net/mptcp/protocol.h | 2 ++ >> net/mptcp/subflow.c | 12 ++++++------ >> 3 files changed, 15 insertions(+), 6 deletions(-) >> >> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c >> index bb8a2a231479..81ea03b9fff6 100644 >> --- a/net/mptcp/protocol.c >> +++ b/net/mptcp/protocol.c >> @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk) >> >> dfrag_uncharge(sk, delta); >> cleaned = true; >> + >> + if (dfrag->resend_count == 0) >> + WRITE_ONCE(msk->noncontiguous, false); >> } >> >> /* all retransmitted data acked, recovery completed */ >> @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, >> dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); >> dfrag->offset = offset + sizeof(struct mptcp_data_frag); >> dfrag->already_sent = 0; >> + dfrag->resend_count = 0; >> dfrag->page = pfrag->page; >> >> return dfrag; >> @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk) >> dfrag->already_sent = max(dfrag->already_sent, info.sent); >> tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, >> info.size_goal); >> + dfrag->resend_count++; >> + WRITE_ONCE(msk->noncontiguous, true); >> } >> >> release_sock(ssk); >> @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, >> WRITE_ONCE(msk->fully_established, false); >> if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) >> WRITE_ONCE(msk->csum_enabled, true); >> + WRITE_ONCE(msk->noncontiguous, false); >> >> msk->write_seq = subflow_req->idsn + 1; >> msk->snd_nxt = msk->write_seq; >> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h >> index d3e6fd1615f1..011f84ae1593 100644 >> --- a/net/mptcp/protocol.h >> +++ b/net/mptcp/protocol.h >> @@ -213,6 +213,7 @@ struct mptcp_data_frag { >> u16 offset; >> u16 overhead; >> u16 already_sent; >> + u16 resend_count; >> struct page *page; >> }; > > Ouch, the above increases mptcp_data_frag size by 8 bytes, due to > holes. > What about rearranging the struct to eliminate the holes? (Full disclosure: I asked Geliang to add this, but am open to other solutions) I was also thinking it could be useful to have this information around for retransmission metrics, but that doesn't seem too important. > Is this necessary? I think the packet scheduler never reinject with a > single subflow. It used to do that, but it should not do anymore. > There may be a single subflow now, but multiple subflows (with unacked reinjections) a very short time ago. > Even if the scheduler re-inject with a single subflow, can we instead > keep track of the last retrans sequence number - in __mptcp_retrans(). > > msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, > msk->snd_una)' > If last_retrans_seq is checked only when msk->noncontiguous is true, I think that works out. Otherwise the last_retrans_seq is stale/invalid if retransmission never happened, or any retransmissions have been fully acked. -- Mat Martineau Intel
On Thu, 2021-09-09 at 17:00 -0700, Mat Martineau wrote: > > On Thu, 9 Sep 2021, Paolo Abeni wrote: > > msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, > > msk->snd_una)' > > > > If last_retrans_seq is checked only when msk->noncontiguous is true, I > think that works out. Otherwise the last_retrans_seq is stale/invalid if > retransmission never happened, or any retransmissions have been fully > acked. I think can we just init 'last_retrans_seq' to intial_seq - 1 at msk creation time and we could always use the above check: if (before64(last_retrnas_seq, msk->snd_una)) WDYT? Cheers, Paolo
On Fri, 10 Sep 2021, Paolo Abeni wrote: > On Thu, 2021-09-09 at 17:00 -0700, Mat Martineau wrote: >>> On Thu, 9 Sep 2021, Paolo Abeni wrote: >>> msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, >>> msk->snd_una)' >>> >> >> If last_retrans_seq is checked only when msk->noncontiguous is true, I >> think that works out. Otherwise the last_retrans_seq is stale/invalid if >> retransmission never happened, or any retransmissions have been fully >> acked. > > I think can we just init 'last_retrans_seq' to intial_seq - 1 at msk > creation time and we could always use the above check: > > if (before64(last_retrnas_seq, msk->snd_una)) > > WDYT? > Ah, ok. Initialize it like that, and then keep moving last_retrans_seq ahead when updating snd_una and the MPTCP stream *is* contiguous, so we don't run in to problems when snd_una wraps around relative to last_retrans_seq. And be sure to set last_retrans_seq to the sequence number for the end of the retransmitted data when entering recovery as well as in __mptcp_retrans(). -- Mat Martineau Intel
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bb8a2a231479..81ea03b9fff6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk) dfrag_uncharge(sk, delta); cleaned = true; + + if (dfrag->resend_count == 0) + WRITE_ONCE(msk->noncontiguous, false); } /* all retransmitted data acked, recovery completed */ @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); dfrag->offset = offset + sizeof(struct mptcp_data_frag); dfrag->already_sent = 0; + dfrag->resend_count = 0; dfrag->page = pfrag->page; return dfrag; @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk) dfrag->already_sent = max(dfrag->already_sent, info.sent); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + dfrag->resend_count++; + WRITE_ONCE(msk->noncontiguous, true); } release_sock(ssk); @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); + WRITE_ONCE(msk->noncontiguous, false); msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d3e6fd1615f1..011f84ae1593 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -213,6 +213,7 @@ struct mptcp_data_frag { u16 offset; u16 overhead; u16 already_sent; + u16 resend_count; struct page *page; }; @@ -249,6 +250,7 @@ struct mptcp_sock { bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; + bool noncontiguous; spinlock_t join_list_lock; struct work_struct work; struct sk_buff *ooo_last_skb; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1de7ce883c37..951aafb6021e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1166,15 +1166,15 @@ static bool subflow_check_data_avail(struct sock *ssk) fallback: /* RFC 8684 section 3.7. */ if (subflow->send_mp_fail) { - if (mptcp_has_another_subflow(ssk)) { + if (mptcp_has_another_subflow(ssk) || READ_ONCE(msk->noncontiguous)) { + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; + tcp_send_active_reset(ssk, GFP_ATOMIC); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); } - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; - tcp_send_active_reset(ssk, GFP_ATOMIC); WRITE_ONCE(subflow->data_avail, 0); return true; }