diff mbox series

[net-next,1/3] net: tcp: send zero-window ACK when no memory

Message ID 20230727125125.1194376-2-imagedong@tencent.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: tcp: support probing OOM | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3391 this patch: 3391
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 1874 this patch: 1874
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3559 this patch: 3559
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 47 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Menglong Dong July 27, 2023, 12:51 p.m. UTC
From: Menglong Dong <imagedong@tencent.com>

For now, skb will be dropped when no memory, which makes client keep
retrans util timeout and it's not friendly to the users.

In this patch, we reply an ACK with zero-window in this case to update
the snd_wnd of the sender to 0. Therefore, the sender won't timeout the
connection and will probe the zero-window with the retransmits.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
---
 include/net/inet_connection_sock.h |  3 ++-
 net/ipv4/tcp_input.c               |  4 ++--
 net/ipv4/tcp_output.c              | 14 +++++++++++---
 3 files changed, 15 insertions(+), 6 deletions(-)

Comments

Eric Dumazet July 27, 2023, 7:17 p.m. UTC | #1
On Thu, Jul 27, 2023 at 2:51 PM <menglong8.dong@gmail.com> wrote:
>
> From: Menglong Dong <imagedong@tencent.com>
>
> For now, skb will be dropped when no memory, which makes client keep
> retrans util timeout and it's not friendly to the users.
>
> In this patch, we reply an ACK with zero-window in this case to update
> the snd_wnd of the sender to 0. Therefore, the sender won't timeout the
> connection and will probe the zero-window with the retransmits.
>
> Signed-off-by: Menglong Dong <imagedong@tencent.com>
> ---
>  include/net/inet_connection_sock.h |  3 ++-
>  net/ipv4/tcp_input.c               |  4 ++--
>  net/ipv4/tcp_output.c              | 14 +++++++++++---
>  3 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> index c2b15f7e5516..be3c858a2ebb 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t {
>         ICSK_ACK_TIMER  = 2,
>         ICSK_ACK_PUSHED = 4,
>         ICSK_ACK_PUSHED2 = 8,
> -       ICSK_ACK_NOW = 16       /* Send the next ACK immediately (once) */
> +       ICSK_ACK_NOW = 16,      /* Send the next ACK immediately (once) */
> +       ICSK_ACK_NOMEM = 32,
>  };
>
>  void inet_csk_init_xmit_timers(struct sock *sk,
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 3cd92035e090..03111af6115d 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -5061,7 +5061,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
>                         reason = SKB_DROP_REASON_PROTO_MEM;
>                         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
>                         sk->sk_data_ready(sk);
> -                       goto drop;
> +                       inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOMEM;

Also set ICSK_ACK_NOW ?

We also need to make sure to send an immediate ACK WIN 0 in the case we queued
the skb in an empty receive queue and we were under pressure.

We do not want to have a delayed ACK sending a normal RWIN,
then wait for another packet that we will probably drop.

Look at the code :

if (skb_queue_len(&sk->sk_receive_queue) == 0)
     sk_forced_mem_schedule(sk, skb->truesize);
else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {

and refactor it to make sure to set  ICSK_ACK_NOMEM even on the first packet
that bypassed the rmem_schedule().



> +                       goto out_of_window;

Why forcing quickack mode ? Please leave the "goto drop;"

>                 }
>
>                 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
> @@ -5102,7 +5103,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
>  out_of_window:
>                 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
>                 inet_csk_schedule_ack(sk);
> -drop:
>                 tcp_drop_reason(sk, skb, reason);
>                 return;
>         }
>
Menglong Dong July 28, 2023, 2:37 a.m. UTC | #2
On Fri, Jul 28, 2023 at 3:17 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Jul 27, 2023 at 2:51 PM <menglong8.dong@gmail.com> wrote:
> >
> > From: Menglong Dong <imagedong@tencent.com>
> >
> > For now, skb will be dropped when no memory, which makes client keep
> > retrans util timeout and it's not friendly to the users.
> >
> > In this patch, we reply an ACK with zero-window in this case to update
> > the snd_wnd of the sender to 0. Therefore, the sender won't timeout the
> > connection and will probe the zero-window with the retransmits.
> >
> > Signed-off-by: Menglong Dong <imagedong@tencent.com>
> > ---
> >  include/net/inet_connection_sock.h |  3 ++-
> >  net/ipv4/tcp_input.c               |  4 ++--
> >  net/ipv4/tcp_output.c              | 14 +++++++++++---
> >  3 files changed, 15 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> > index c2b15f7e5516..be3c858a2ebb 100644
> > --- a/include/net/inet_connection_sock.h
> > +++ b/include/net/inet_connection_sock.h
> > @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t {
> >         ICSK_ACK_TIMER  = 2,
> >         ICSK_ACK_PUSHED = 4,
> >         ICSK_ACK_PUSHED2 = 8,
> > -       ICSK_ACK_NOW = 16       /* Send the next ACK immediately (once) */
> > +       ICSK_ACK_NOW = 16,      /* Send the next ACK immediately (once) */
> > +       ICSK_ACK_NOMEM = 32,
> >  };
> >
> >  void inet_csk_init_xmit_timers(struct sock *sk,
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 3cd92035e090..03111af6115d 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -5061,7 +5061,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
> >                         reason = SKB_DROP_REASON_PROTO_MEM;
> >                         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
> >                         sk->sk_data_ready(sk);
> > -                       goto drop;
> > +                       inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOMEM;
>
> Also set ICSK_ACK_NOW ?
>
> We also need to make sure to send an immediate ACK WIN 0 in the case we queued
> the skb in an empty receive queue and we were under pressure.
>
> We do not want to have a delayed ACK sending a normal RWIN,
> then wait for another packet that we will probably drop.
>
> Look at the code :
>
> if (skb_queue_len(&sk->sk_receive_queue) == 0)
>      sk_forced_mem_schedule(sk, skb->truesize);
> else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
>
> and refactor it to make sure to set  ICSK_ACK_NOMEM even on the first packet
> that bypassed the rmem_schedule().
>

Ok, that makes sense.

>
>
> > +                       goto out_of_window;
>
> Why forcing quickack mode ? Please leave the "goto drop;"
>

Hmm...because I want to make it send an immediate
ACK. Obviously, a ICSK_ACK_NOW flag should be
the better choice here.

> >                 }
> >
> >                 eaten = tcp_queue_rcv(sk, skb, &fragstolen);
> > @@ -5102,7 +5103,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
> >  out_of_window:
> >                 tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
> >                 inet_csk_schedule_ack(sk);
> > -drop:
> >                 tcp_drop_reason(sk, skb, reason);
> >                 return;
> >         }
> >
diff mbox series

Patch

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c2b15f7e5516..be3c858a2ebb 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -164,7 +164,8 @@  enum inet_csk_ack_state_t {
 	ICSK_ACK_TIMER  = 2,
 	ICSK_ACK_PUSHED = 4,
 	ICSK_ACK_PUSHED2 = 8,
-	ICSK_ACK_NOW = 16	/* Send the next ACK immediately (once) */
+	ICSK_ACK_NOW = 16,	/* Send the next ACK immediately (once) */
+	ICSK_ACK_NOMEM = 32,
 };
 
 void inet_csk_init_xmit_timers(struct sock *sk,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3cd92035e090..03111af6115d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5061,7 +5061,8 @@  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			reason = SKB_DROP_REASON_PROTO_MEM;
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 			sk->sk_data_ready(sk);
-			goto drop;
+			inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOMEM;
+			goto out_of_window;
 		}
 
 		eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5102,7 +5103,6 @@  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 out_of_window:
 		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 		inet_csk_schedule_ack(sk);
-drop:
 		tcp_drop_reason(sk, skb, reason);
 		return;
 	}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2cb39b6dad02..81aa2c615924 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -257,11 +257,19 @@  EXPORT_SYMBOL(tcp_select_initial_window);
 static u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 old_win = tp->rcv_wnd;
-	u32 cur_win = tcp_receive_window(tp);
-	u32 new_win = __tcp_select_window(sk);
 	struct net *net = sock_net(sk);
+	u32 old_win = tp->rcv_wnd;
+	u32 cur_win, new_win;
+
+	/* Make the window 0 if we failed to queue the data because we
+	 * are out of memory. The window is temporary, so we don't store
+	 * it on the socket.
+	 */
+	if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM))
+		return 0;
 
+	cur_win = tcp_receive_window(tp);
+	new_win = __tcp_select_window(sk);
 	if (new_win < cur_win) {
 		/* Danger Will Robinson!
 		 * Don't update rcv_wup/rcv_wnd here or else