diff mbox series

[net-next] tcp: handle the deferred sack compression when it should be canceled

Message ID 20230530023737.584-1-kerneljasonxing@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next] tcp: handle the deferred sack compression when it should be canceled | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/apply fail Patch does not apply to net-next

Commit Message

Jason Xing May 30, 2023, 2:37 a.m. UTC
From: Jason Xing <kernelxing@tencent.com>

In the previous commits, we have not implemented to deal with the case
where we're going to abort sending a ack triggered in the timer. So
introducing a new flag ICSK_ACK_COMP_TIMER for sack compression only
could satisify this requirement.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
Comment on this:
This patch is based on top of a commit[1]. I don't think the current
patch is fixing a bug, but more like an improvement. So I would like
to target at net-next tree.

[1]: https://patchwork.kernel.org/project/netdevbpf/patch/20230529113804.GA20300@didi-ThinkCentre-M920t-N000/
---
 include/net/inet_connection_sock.h | 3 ++-
 net/ipv4/tcp_input.c               | 6 +++++-
 net/ipv4/tcp_output.c              | 3 +++
 net/ipv4/tcp_timer.c               | 5 ++++-
 4 files changed, 14 insertions(+), 3 deletions(-)

Comments

Eric Dumazet May 30, 2023, 7:09 a.m. UTC | #1
On Tue, May 30, 2023 at 4:37 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> From: Jason Xing <kernelxing@tencent.com>
>
> In the previous commits, we have not implemented to deal with the case
> where we're going to abort sending a ack triggered in the timer. So
> introducing a new flag ICSK_ACK_COMP_TIMER for sack compression only
> could satisify this requirement.
>


Sorry, I can not parse this changelog.

What is the problem you want to fix ?


> Signed-off-by: Jason Xing <kernelxing@tencent.com>
> ---
> Comment on this:
> This patch is based on top of a commit[1]. I don't think the current
> patch is fixing a bug, but more like an improvement. So I would like
> to target at net-next tree.
>
> [1]: https://patchwork.kernel.org/project/netdevbpf/patch/20230529113804.GA20300@didi-ThinkCentre-M920t-N000/
> ---
>  include/net/inet_connection_sock.h | 3 ++-
>  net/ipv4/tcp_input.c               | 6 +++++-
>  net/ipv4/tcp_output.c              | 3 +++
>  net/ipv4/tcp_timer.c               | 5 ++++-
>  4 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> index c2b15f7e5516..34ff6d27471d 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t {
>         ICSK_ACK_TIMER  = 2,
>         ICSK_ACK_PUSHED = 4,
>         ICSK_ACK_PUSHED2 = 8,
> -       ICSK_ACK_NOW = 16       /* Send the next ACK immediately (once) */
> +       ICSK_ACK_NOW = 16,      /* Send the next ACK immediately (once) */
> +       ICSK_ACK_COMP_TIMER  = 32       /* Used for sack compression */
>  };
>
>  void inet_csk_init_xmit_timers(struct sock *sk,
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index cc072d2cfcd8..3980f77dcdff 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4540,6 +4540,9 @@ static void tcp_sack_compress_send_ack(struct sock *sk)
>         if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
>                 __sock_put(sk);
>
> +       /* It also deal with the case where the sack compression is deferred */
> +       inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
> +
>         /* Since we have to send one ack finally,
>          * substract one from tp->compressed_ack to keep
>          * LINUX_MIB_TCPACKCOMPRESSED accurate.
> @@ -5555,7 +5558,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
>                 goto send_now;
>         }
>         tp->compressed_ack++;
> -       if (hrtimer_is_queued(&tp->compressed_ack_timer))
> +       if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_COMP_TIMER)
>                 return;

I do not see why adding a new bit, while hrtimer() already has one ?

>
>         /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
> @@ -5568,6 +5571,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
>                       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
>                       rtt * (NSEC_PER_USEC >> 3)/20);
>         sock_hold(sk);
> +       inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_COMP_TIMER;
>         hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
>                                READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
>                                HRTIMER_MODE_REL_PINNED_SOFT);
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 02b58721ab6b..83840daad142 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -188,6 +188,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
>                 tp->compressed_ack = 0;
>                 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
>                         __sock_put(sk);
> +
> +               /* It also deal with the case where the sack compression is deferred */
> +               inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;

This is adding a lot of code in fast path.

>         }
>
>         if (unlikely(rcv_nxt != tp->rcv_nxt))
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index b3f8933b347c..a970336d1383 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -323,9 +323,12 @@ void tcp_compack_timer_handler(struct sock *sk)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
> +       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
> +           !(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_COMP_TIMER))
>                 return;
>
> +       inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
> +
>         if (tp->compressed_ack) {
>                 /* Since we have to send one ack finally,
>                  * subtract one from tp->compressed_ack to keep
> --
> 2.37.3
>

An ACK is an ACK, I do not see why you need to differentiate them.
Jason Xing May 30, 2023, 7:23 a.m. UTC | #2
On Tue, May 30, 2023 at 3:09 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Tue, May 30, 2023 at 4:37 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
> >
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > In the previous commits, we have not implemented to deal with the case
> > where we're going to abort sending a ack triggered in the timer. So
> > introducing a new flag ICSK_ACK_COMP_TIMER for sack compression only
> > could satisify this requirement.
> >
>
>
> Sorry, I can not parse this changelog.
>
> What is the problem you want to fix ?

In the old logic, we will cancel the timer in order to avoid sending
an ack in those two functions (tcp_sack_compress_send_ack() and
tcp_event_ack_sent()). What if we already triggered the hrtimer and
defer sending an ack to release cb process due to the socket owned by
someone?

My intention is to abort sending an ack here if we defer in the hrtimer handler.

>
>
> > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > ---
> > Comment on this:
> > This patch is based on top of a commit[1]. I don't think the current
> > patch is fixing a bug, but more like an improvement. So I would like
> > to target at net-next tree.
> >
> > [1]: https://patchwork.kernel.org/project/netdevbpf/patch/20230529113804.GA20300@didi-ThinkCentre-M920t-N000/
> > ---
> >  include/net/inet_connection_sock.h | 3 ++-
> >  net/ipv4/tcp_input.c               | 6 +++++-
> >  net/ipv4/tcp_output.c              | 3 +++
> >  net/ipv4/tcp_timer.c               | 5 ++++-
> >  4 files changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> > index c2b15f7e5516..34ff6d27471d 100644
> > --- a/include/net/inet_connection_sock.h
> > +++ b/include/net/inet_connection_sock.h
> > @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t {
> >         ICSK_ACK_TIMER  = 2,
> >         ICSK_ACK_PUSHED = 4,
> >         ICSK_ACK_PUSHED2 = 8,
> > -       ICSK_ACK_NOW = 16       /* Send the next ACK immediately (once) */
> > +       ICSK_ACK_NOW = 16,      /* Send the next ACK immediately (once) */
> > +       ICSK_ACK_COMP_TIMER  = 32       /* Used for sack compression */
> >  };
> >
> >  void inet_csk_init_xmit_timers(struct sock *sk,
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index cc072d2cfcd8..3980f77dcdff 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -4540,6 +4540,9 @@ static void tcp_sack_compress_send_ack(struct sock *sk)
> >         if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
> >                 __sock_put(sk);
> >
> > +       /* It also deal with the case where the sack compression is deferred */
> > +       inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
> > +
> >         /* Since we have to send one ack finally,
> >          * substract one from tp->compressed_ack to keep
> >          * LINUX_MIB_TCPACKCOMPRESSED accurate.
> > @@ -5555,7 +5558,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
> >                 goto send_now;
> >         }
> >         tp->compressed_ack++;
> > -       if (hrtimer_is_queued(&tp->compressed_ack_timer))
> > +       if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_COMP_TIMER)
> >                 return;
>
> I do not see why adding a new bit, while hrtimer() already has one ?

As I mentioned above, testing whether hrtimer is active or not cannot
help us test if we choose to defer before this.

>
> >
> >         /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
> > @@ -5568,6 +5571,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
> >                       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
> >                       rtt * (NSEC_PER_USEC >> 3)/20);
> >         sock_hold(sk);
> > +       inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_COMP_TIMER;
> >         hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
> >                                READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
> >                                HRTIMER_MODE_REL_PINNED_SOFT);
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index 02b58721ab6b..83840daad142 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -188,6 +188,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
> >                 tp->compressed_ack = 0;
> >                 if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
> >                         __sock_put(sk);
> > +
> > +               /* It also deal with the case where the sack compression is deferred */
> > +               inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
>
> This is adding a lot of code in fast path.
>
> >         }
> >
> >         if (unlikely(rcv_nxt != tp->rcv_nxt))
> > diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> > index b3f8933b347c..a970336d1383 100644
> > --- a/net/ipv4/tcp_timer.c
> > +++ b/net/ipv4/tcp_timer.c
> > @@ -323,9 +323,12 @@ void tcp_compack_timer_handler(struct sock *sk)
> >  {
> >         struct tcp_sock *tp = tcp_sk(sk);
> >
> > -       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
> > +       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
> > +           !(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_COMP_TIMER))
> >                 return;
> >
> > +       inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
> > +
> >         if (tp->compressed_ack) {
> >                 /* Since we have to send one ack finally,
> >                  * subtract one from tp->compressed_ack to keep
> > --
> > 2.37.3
> >
>
> An ACK is an ACK, I do not see why you need to differentiate them.

Well, actually I was a little bit confused about whether to reuse the
ICSK_ACK_TIMER flag to deal with sack compression. So I added another
new flag to differentiate them :(

Thanks for your reply.

Jason
diff mbox series

Patch

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c2b15f7e5516..34ff6d27471d 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -164,7 +164,8 @@  enum inet_csk_ack_state_t {
 	ICSK_ACK_TIMER  = 2,
 	ICSK_ACK_PUSHED = 4,
 	ICSK_ACK_PUSHED2 = 8,
-	ICSK_ACK_NOW = 16	/* Send the next ACK immediately (once) */
+	ICSK_ACK_NOW = 16,	/* Send the next ACK immediately (once) */
+	ICSK_ACK_COMP_TIMER  = 32	/* Used for sack compression */
 };
 
 void inet_csk_init_xmit_timers(struct sock *sk,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc072d2cfcd8..3980f77dcdff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4540,6 +4540,9 @@  static void tcp_sack_compress_send_ack(struct sock *sk)
 	if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
 		__sock_put(sk);
 
+	/* It also deal with the case where the sack compression is deferred */
+	inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
+
 	/* Since we have to send one ack finally,
 	 * substract one from tp->compressed_ack to keep
 	 * LINUX_MIB_TCPACKCOMPRESSED accurate.
@@ -5555,7 +5558,7 @@  static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 		goto send_now;
 	}
 	tp->compressed_ack++;
-	if (hrtimer_is_queued(&tp->compressed_ack_timer))
+	if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_COMP_TIMER)
 		return;
 
 	/* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
@@ -5568,6 +5571,7 @@  static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 		      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
 		      rtt * (NSEC_PER_USEC >> 3)/20);
 	sock_hold(sk);
+	inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_COMP_TIMER;
 	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
 			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
 			       HRTIMER_MODE_REL_PINNED_SOFT);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 02b58721ab6b..83840daad142 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -188,6 +188,9 @@  static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
 		tp->compressed_ack = 0;
 		if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
 			__sock_put(sk);
+
+		/* It also deal with the case where the sack compression is deferred */
+		inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
 	}
 
 	if (unlikely(rcv_nxt != tp->rcv_nxt))
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b3f8933b347c..a970336d1383 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -323,9 +323,12 @@  void tcp_compack_timer_handler(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+	    !(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_COMP_TIMER))
 		return;
 
+	inet_csk(sk)->icsk_ack.pending &= ~ICSK_ACK_COMP_TIMER;
+
 	if (tp->compressed_ack) {
 		/* Since we have to send one ack finally,
 		 * subtract one from tp->compressed_ack to keep