diff mbox series

tcp: del skb from tsorted_sent_queue after mark it as lost

Message ID 1661761242-7849-1-git-send-email-liyonglong@chinatelecom.cn (mailing list archive)
State Rejected
Delegated to: Netdev Maintainers
Headers show
Series tcp: del skb from tsorted_sent_queue after mark it as lost | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/cc_maintainers warning 1 maintainers not CCed: yoshfuji@linux-ipv6.org
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 40 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

YonglongLi Aug. 29, 2022, 8:20 a.m. UTC
if rack is enabled, when skb marked as lost we can remove it from
tsorted_sent_queue. It will reduces the iterations on tsorted_sent_queue
in tcp_rack_detect_loss

Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
---
 net/ipv4/tcp_input.c    | 15 +++++++++------
 net/ipv4/tcp_recovery.c |  1 -
 2 files changed, 9 insertions(+), 7 deletions(-)

Comments

Yuchung Cheng Aug. 30, 2022, 12:23 a.m. UTC | #1
On Mon, Aug 29, 2022 at 1:21 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
>
> if rack is enabled, when skb marked as lost we can remove it from
> tsorted_sent_queue. It will reduces the iterations on tsorted_sent_queue
> in tcp_rack_detect_loss

Did you test the case where an skb is marked lost again after
retransmission? I can't quite remember the reason I avoided this
optimization. let me run some test and get back to you.


>
> Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
> ---
>  net/ipv4/tcp_input.c    | 15 +++++++++------
>  net/ipv4/tcp_recovery.c |  1 -
>  2 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index ab5f0ea..01bd644 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1082,6 +1082,12 @@ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff
>         tp->lost += tcp_skb_pcount(skb);
>  }
>
> +static bool tcp_is_rack(const struct sock *sk)
> +{
> +       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
> +               TCP_RACK_LOSS_DETECTION;
> +}
> +
>  void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
>  {
>         __u8 sacked = TCP_SKB_CB(skb)->sacked;
> @@ -1105,6 +1111,9 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
>                 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
>                 tcp_notify_skb_loss_event(tp, skb);
>         }
> +
> +       if (tcp_is_rack(sk))
> +               list_del_init(&skb->tcp_tsorted_anchor);
>  }
>
>  /* Updates the delivered and delivered_ce counts */
> @@ -2093,12 +2102,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
>         tp->undo_retrans = tp->retrans_out ? : -1;
>  }
>
> -static bool tcp_is_rack(const struct sock *sk)
> -{
> -       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
> -               TCP_RACK_LOSS_DETECTION;
> -}
> -
>  /* If we detect SACK reneging, forget all SACK information
>   * and reset tags completely, otherwise preserve SACKs. If receiver
>   * dropped its ofo queue, we will know this due to reneging detection.
> diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
> index 50abaa9..ba52ec9e 100644
> --- a/net/ipv4/tcp_recovery.c
> +++ b/net/ipv4/tcp_recovery.c
> @@ -84,7 +84,6 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
>                 remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
>                 if (remaining <= 0) {
>                         tcp_mark_skb_lost(sk, skb);
> -                       list_del_init(&skb->tcp_tsorted_anchor);
>                 } else {
>                         /* Record maximum wait time */
>                         *reo_timeout = max_t(u32, *reo_timeout, remaining);
> --
> 1.8.3.1
>
Yuchung Cheng Aug. 31, 2022, 5:58 a.m. UTC | #2
On Mon, Aug 29, 2022 at 5:23 PM Yuchung Cheng <ycheng@google.com> wrote:
>
> On Mon, Aug 29, 2022 at 1:21 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
> >
> > if rack is enabled, when skb marked as lost we can remove it from
> > tsorted_sent_queue. It will reduces the iterations on tsorted_sent_queue
> > in tcp_rack_detect_loss
>
> Did you test the case where an skb is marked lost again after
> retransmission? I can't quite remember the reason I avoided this
> optimization. let me run some test and get back to you.
As I suspected, this patch fails to pass our packet drill tests.

It breaks detecting retransmitted packets that
get lost again, b/c they have already been removed from the tsorted
list when they get lost the first time.



>
>
> >
> > Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
> > ---
> >  net/ipv4/tcp_input.c    | 15 +++++++++------
> >  net/ipv4/tcp_recovery.c |  1 -
> >  2 files changed, 9 insertions(+), 7 deletions(-)
> >
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index ab5f0ea..01bd644 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -1082,6 +1082,12 @@ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff
> >         tp->lost += tcp_skb_pcount(skb);
> >  }
> >
> > +static bool tcp_is_rack(const struct sock *sk)
> > +{
> > +       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
> > +               TCP_RACK_LOSS_DETECTION;
> > +}
> > +
> >  void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
> >  {
> >         __u8 sacked = TCP_SKB_CB(skb)->sacked;
> > @@ -1105,6 +1111,9 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
> >                 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
> >                 tcp_notify_skb_loss_event(tp, skb);
> >         }
> > +
> > +       if (tcp_is_rack(sk))
> > +               list_del_init(&skb->tcp_tsorted_anchor);
> >  }
> >
> >  /* Updates the delivered and delivered_ce counts */
> > @@ -2093,12 +2102,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
> >         tp->undo_retrans = tp->retrans_out ? : -1;
> >  }
> >
> > -static bool tcp_is_rack(const struct sock *sk)
> > -{
> > -       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
> > -               TCP_RACK_LOSS_DETECTION;
> > -}
> > -
> >  /* If we detect SACK reneging, forget all SACK information
> >   * and reset tags completely, otherwise preserve SACKs. If receiver
> >   * dropped its ofo queue, we will know this due to reneging detection.
> > diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
> > index 50abaa9..ba52ec9e 100644
> > --- a/net/ipv4/tcp_recovery.c
> > +++ b/net/ipv4/tcp_recovery.c
> > @@ -84,7 +84,6 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
> >                 remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
> >                 if (remaining <= 0) {
> >                         tcp_mark_skb_lost(sk, skb);
> > -                       list_del_init(&skb->tcp_tsorted_anchor);
> >                 } else {
> >                         /* Record maximum wait time */
> >                         *reo_timeout = max_t(u32, *reo_timeout, remaining);
> > --
> > 1.8.3.1
> >
YonglongLi Aug. 31, 2022, 7:19 a.m. UTC | #3
On 8/31/2022 1:58 PM, Yuchung Cheng wrote:
> On Mon, Aug 29, 2022 at 5:23 PM Yuchung Cheng <ycheng@google.com> wrote:
>>
>> On Mon, Aug 29, 2022 at 1:21 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
>>>
>>> if rack is enabled, when skb marked as lost we can remove it from
>>> tsorted_sent_queue. It will reduces the iterations on tsorted_sent_queue
>>> in tcp_rack_detect_loss
>>
>> Did you test the case where an skb is marked lost again after
>> retransmission? I can't quite remember the reason I avoided this
>> optimization. let me run some test and get back to you.
> As I suspected, this patch fails to pass our packet drill tests.
> 
> It breaks detecting retransmitted packets that
> get lost again, b/c they have already been removed from the tsorted
> list when they get lost the first time.
> 
> 

Hi Yuchung,
Thank you for your feelback.
But I am not quite understand. in the current implementation, if an skb
is marked lost again after retransmission, it will be added to tail of
tsorted_sent_queue again in tcp_update_skb_after_send.
Do I miss some code?

> 
>>
>>
>>>
>>> Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
>>> ---
>>>  net/ipv4/tcp_input.c    | 15 +++++++++------
>>>  net/ipv4/tcp_recovery.c |  1 -
>>>  2 files changed, 9 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>>> index ab5f0ea..01bd644 100644
>>> --- a/net/ipv4/tcp_input.c
>>> +++ b/net/ipv4/tcp_input.c
>>> @@ -1082,6 +1082,12 @@ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff
>>>         tp->lost += tcp_skb_pcount(skb);
>>>  }
>>>
>>> +static bool tcp_is_rack(const struct sock *sk)
>>> +{
>>> +       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
>>> +               TCP_RACK_LOSS_DETECTION;
>>> +}
>>> +
>>>  void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
>>>  {
>>>         __u8 sacked = TCP_SKB_CB(skb)->sacked;
>>> @@ -1105,6 +1111,9 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
>>>                 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
>>>                 tcp_notify_skb_loss_event(tp, skb);
>>>         }
>>> +
>>> +       if (tcp_is_rack(sk))
>>> +               list_del_init(&skb->tcp_tsorted_anchor);
>>>  }
>>>
>>>  /* Updates the delivered and delivered_ce counts */
>>> @@ -2093,12 +2102,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
>>>         tp->undo_retrans = tp->retrans_out ? : -1;
>>>  }
>>>
>>> -static bool tcp_is_rack(const struct sock *sk)
>>> -{
>>> -       return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
>>> -               TCP_RACK_LOSS_DETECTION;
>>> -}
>>> -
>>>  /* If we detect SACK reneging, forget all SACK information
>>>   * and reset tags completely, otherwise preserve SACKs. If receiver
>>>   * dropped its ofo queue, we will know this due to reneging detection.
>>> diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
>>> index 50abaa9..ba52ec9e 100644
>>> --- a/net/ipv4/tcp_recovery.c
>>> +++ b/net/ipv4/tcp_recovery.c
>>> @@ -84,7 +84,6 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
>>>                 remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
>>>                 if (remaining <= 0) {
>>>                         tcp_mark_skb_lost(sk, skb);
>>> -                       list_del_init(&skb->tcp_tsorted_anchor);
>>>                 } else {
>>>                         /* Record maximum wait time */
>>>                         *reo_timeout = max_t(u32, *reo_timeout, remaining);
>>> --
>>> 1.8.3.1
>>>
>
Neal Cardwell Aug. 31, 2022, 12:46 p.m. UTC | #4
On Wed, Aug 31, 2022 at 3:19 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
>
>
>
> On 8/31/2022 1:58 PM, Yuchung Cheng wrote:
> > On Mon, Aug 29, 2022 at 5:23 PM Yuchung Cheng <ycheng@google.com> wrote:
> >>
> >> On Mon, Aug 29, 2022 at 1:21 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
> >>>
> >>> if rack is enabled, when skb marked as lost we can remove it from
> >>> tsorted_sent_queue. It will reduces the iterations on tsorted_sent_queue
> >>> in tcp_rack_detect_loss
> >>
> >> Did you test the case where an skb is marked lost again after
> >> retransmission? I can't quite remember the reason I avoided this
> >> optimization. let me run some test and get back to you.
> > As I suspected, this patch fails to pass our packet drill tests.
> >
> > It breaks detecting retransmitted packets that
> > get lost again, b/c they have already been removed from the tsorted
> > list when they get lost the first time.
> >
> >
>
> Hi Yuchung,
> Thank you for your feelback.
> But I am not quite understand. in the current implementation, if an skb
> is marked lost again after retransmission, it will be added to tail of
> tsorted_sent_queue again in tcp_update_skb_after_send.
> Do I miss some code?

That's correct, but in the kind of scenario Yuchung is talking about,
the skb is not retransmitted again.

To clarify, here is an example snippet of a test written by Yuchung
that covers this kind of case:

----
`../common/defaults.sh`

    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
 +.02 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4
   +0 write(4, ..., 16000) = 16000
   +0 > P. 1:10001(10000) ack 1

// TLP (but it is dropped too so no ack for it)
 +.04 > . 10001:11001(1000) ack 1

// RTO and retransmit head
 +.22 > . 1:1001(1000) ack 1

// ACK was lost. But the (spurious) retransmit induced a DSACK.
// So total this ack hints two packets (original & dup).
// Undo cwnd and ssthresh.
 +.01 < . 1:1(0) ack 1001 win 257 <sack 1:1001,nop,nop>
   +0 > P. 11001:13001(2000) ack 1
   +0 %{
assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd
assert tcpi_snd_ssthresh > 1000000, tcpi_snd_ssthresh
}%

// TLP to discover the real losses 1001:11001(10000)
 +.04 > . 13001:14001(1000) ack 1

// Fast recovery. PRR first then PRR-SS after retransmits are acked
 +.01 < . 1:1(0) ack 1001 win 257 <sack 11001:12001,nop,nop>
   +0 > . 1001:2001(1000) ack 1
----

In this test case, with the proposed patch in this thread applied, the
final 1001:2001(1000) skb is transmitted 440ms later, after an RTO.
AFAICT that's because the 1001:2001(1000) skb was removed from the
tsorted list upon the original (spurious RTO) but not re-added upon
the undo of that spurious RTO.

best regards,
neal
YonglongLi Sept. 1, 2022, 7:38 a.m. UTC | #5
Hi Neal, Yuchung,

I get your point. Thank you for your patience to feelback.

On 8/31/2022 8:46 PM, Neal Cardwell wrote:
> On Wed, Aug 31, 2022 at 3:19 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
>>
>>
>>
>> On 8/31/2022 1:58 PM, Yuchung Cheng wrote:
>>> On Mon, Aug 29, 2022 at 5:23 PM Yuchung Cheng <ycheng@google.com> wrote:
>>>>
>>>> On Mon, Aug 29, 2022 at 1:21 AM Yonglong Li <liyonglong@chinatelecom.cn> wrote:
>>>>>
>>>>> if rack is enabled, when skb marked as lost we can remove it from
>>>>> tsorted_sent_queue. It will reduces the iterations on tsorted_sent_queue
>>>>> in tcp_rack_detect_loss
>>>>
>>>> Did you test the case where an skb is marked lost again after
>>>> retransmission? I can't quite remember the reason I avoided this
>>>> optimization. let me run some test and get back to you.
>>> As I suspected, this patch fails to pass our packet drill tests.
>>>
>>> It breaks detecting retransmitted packets that
>>> get lost again, b/c they have already been removed from the tsorted
>>> list when they get lost the first time.
>>>
>>>
>>
>> Hi Yuchung,
>> Thank you for your feelback.
>> But I am not quite understand. in the current implementation, if an skb
>> is marked lost again after retransmission, it will be added to tail of
>> tsorted_sent_queue again in tcp_update_skb_after_send.
>> Do I miss some code?
> 
> That's correct, but in the kind of scenario Yuchung is talking about,
> the skb is not retransmitted again.
> 
> To clarify, here is an example snippet of a test written by Yuchung
> that covers this kind of case:
> 
> ----
> `../common/defaults.sh`
> 
>     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
>    +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
>    +0 bind(3, ..., ...) = 0
>    +0 listen(3, 1) = 0
> 
>    +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
>    +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
>  +.02 < . 1:1(0) ack 1 win 257
>    +0 accept(3, ..., ...) = 4
>    +0 write(4, ..., 16000) = 16000
>    +0 > P. 1:10001(10000) ack 1
> 
> // TLP (but it is dropped too so no ack for it)
>  +.04 > . 10001:11001(1000) ack 1
> 
> // RTO and retransmit head
>  +.22 > . 1:1001(1000) ack 1
> 
> // ACK was lost. But the (spurious) retransmit induced a DSACK.
> // So total this ack hints two packets (original & dup).
> // Undo cwnd and ssthresh.
>  +.01 < . 1:1(0) ack 1001 win 257 <sack 1:1001,nop,nop>
>    +0 > P. 11001:13001(2000) ack 1
>    +0 %{
> assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd
> assert tcpi_snd_ssthresh > 1000000, tcpi_snd_ssthresh
> }%
> 
> // TLP to discover the real losses 1001:11001(10000)
>  +.04 > . 13001:14001(1000) ack 1
> 
> // Fast recovery. PRR first then PRR-SS after retransmits are acked
>  +.01 < . 1:1(0) ack 1001 win 257 <sack 11001:12001,nop,nop>
>    +0 > . 1001:2001(1000) ack 1
> ----
> 
> In this test case, with the proposed patch in this thread applied, the
> final 1001:2001(1000) skb is transmitted 440ms later, after an RTO.
> AFAICT that's because the 1001:2001(1000) skb was removed from the
> tsorted list upon the original (spurious RTO) but not re-added upon
> the undo of that spurious RTO.
> 
> best regards,
> neal
>
diff mbox series

Patch

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ab5f0ea..01bd644 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1082,6 +1082,12 @@  static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff
 	tp->lost += tcp_skb_pcount(skb);
 }
 
+static bool tcp_is_rack(const struct sock *sk)
+{
+	return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+		TCP_RACK_LOSS_DETECTION;
+}
+
 void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 {
 	__u8 sacked = TCP_SKB_CB(skb)->sacked;
@@ -1105,6 +1111,9 @@  void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 		tcp_notify_skb_loss_event(tp, skb);
 	}
+
+	if (tcp_is_rack(sk))
+		list_del_init(&skb->tcp_tsorted_anchor);
 }
 
 /* Updates the delivered and delivered_ce counts */
@@ -2093,12 +2102,6 @@  static inline void tcp_init_undo(struct tcp_sock *tp)
 	tp->undo_retrans = tp->retrans_out ? : -1;
 }
 
-static bool tcp_is_rack(const struct sock *sk)
-{
-	return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
-		TCP_RACK_LOSS_DETECTION;
-}
-
 /* If we detect SACK reneging, forget all SACK information
  * and reset tags completely, otherwise preserve SACKs. If receiver
  * dropped its ofo queue, we will know this due to reneging detection.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 50abaa9..ba52ec9e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -84,7 +84,6 @@  static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 		remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
 		if (remaining <= 0) {
 			tcp_mark_skb_lost(sk, skb);
-			list_del_init(&skb->tcp_tsorted_anchor);
 		} else {
 			/* Record maximum wait time */
 			*reo_timeout = max_t(u32, *reo_timeout, remaining);