Message ID | 20230727125125.1194376-2-imagedong@tencent.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: tcp: support probing OOM | expand |
On Thu, Jul 27, 2023 at 2:51 PM <menglong8.dong@gmail.com> wrote: > > From: Menglong Dong <imagedong@tencent.com> > > For now, skb will be dropped when no memory, which makes client keep > retrans util timeout and it's not friendly to the users. > > In this patch, we reply an ACK with zero-window in this case to update > the snd_wnd of the sender to 0. Therefore, the sender won't timeout the > connection and will probe the zero-window with the retransmits. > > Signed-off-by: Menglong Dong <imagedong@tencent.com> > --- > include/net/inet_connection_sock.h | 3 ++- > net/ipv4/tcp_input.c | 4 ++-- > net/ipv4/tcp_output.c | 14 +++++++++++--- > 3 files changed, 15 insertions(+), 6 deletions(-) > > diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h > index c2b15f7e5516..be3c858a2ebb 100644 > --- a/include/net/inet_connection_sock.h > +++ b/include/net/inet_connection_sock.h > @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t { > ICSK_ACK_TIMER = 2, > ICSK_ACK_PUSHED = 4, > ICSK_ACK_PUSHED2 = 8, > - ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ > + ICSK_ACK_NOW = 16, /* Send the next ACK immediately (once) */ > + ICSK_ACK_NOMEM = 32, > }; > > void inet_csk_init_xmit_timers(struct sock *sk, > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > index 3cd92035e090..03111af6115d 100644 > --- a/net/ipv4/tcp_input.c > +++ b/net/ipv4/tcp_input.c > @@ -5061,7 +5061,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) > reason = SKB_DROP_REASON_PROTO_MEM; > NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); > sk->sk_data_ready(sk); > - goto drop; > + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOMEM; Also set ICSK_ACK_NOW ? We also need to make sure to send an immediate ACK WIN 0 in the case we queued the skb in an empty receive queue and we were under pressure. We do not want to have a delayed ACK sending a normal RWIN, then wait for another packet that we will probably drop. Look at the code : if (skb_queue_len(&sk->sk_receive_queue) == 0) sk_forced_mem_schedule(sk, skb->truesize); else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { and refactor it to make sure to set ICSK_ACK_NOMEM even on the first packet that bypassed the rmem_schedule(). > + goto out_of_window; Why forcing quickack mode ? Please leave the "goto drop;" > } > > eaten = tcp_queue_rcv(sk, skb, &fragstolen); > @@ -5102,7 +5103,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) > out_of_window: > tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); > inet_csk_schedule_ack(sk); > -drop: > tcp_drop_reason(sk, skb, reason); > return; > } >
On Fri, Jul 28, 2023 at 3:17 AM Eric Dumazet <edumazet@google.com> wrote: > > On Thu, Jul 27, 2023 at 2:51 PM <menglong8.dong@gmail.com> wrote: > > > > From: Menglong Dong <imagedong@tencent.com> > > > > For now, skb will be dropped when no memory, which makes client keep > > retrans util timeout and it's not friendly to the users. > > > > In this patch, we reply an ACK with zero-window in this case to update > > the snd_wnd of the sender to 0. Therefore, the sender won't timeout the > > connection and will probe the zero-window with the retransmits. > > > > Signed-off-by: Menglong Dong <imagedong@tencent.com> > > --- > > include/net/inet_connection_sock.h | 3 ++- > > net/ipv4/tcp_input.c | 4 ++-- > > net/ipv4/tcp_output.c | 14 +++++++++++--- > > 3 files changed, 15 insertions(+), 6 deletions(-) > > > > diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h > > index c2b15f7e5516..be3c858a2ebb 100644 > > --- a/include/net/inet_connection_sock.h > > +++ b/include/net/inet_connection_sock.h > > @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t { > > ICSK_ACK_TIMER = 2, > > ICSK_ACK_PUSHED = 4, > > ICSK_ACK_PUSHED2 = 8, > > - ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ > > + ICSK_ACK_NOW = 16, /* Send the next ACK immediately (once) */ > > + ICSK_ACK_NOMEM = 32, > > }; > > > > void inet_csk_init_xmit_timers(struct sock *sk, > > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > > index 3cd92035e090..03111af6115d 100644 > > --- a/net/ipv4/tcp_input.c > > +++ b/net/ipv4/tcp_input.c > > @@ -5061,7 +5061,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) > > reason = SKB_DROP_REASON_PROTO_MEM; > > NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); > > sk->sk_data_ready(sk); > > - goto drop; > > + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOMEM; > > Also set ICSK_ACK_NOW ? > > We also need to make sure to send an immediate ACK WIN 0 in the case we queued > the skb in an empty receive queue and we were under pressure. > > We do not want to have a delayed ACK sending a normal RWIN, > then wait for another packet that we will probably drop. > > Look at the code : > > if (skb_queue_len(&sk->sk_receive_queue) == 0) > sk_forced_mem_schedule(sk, skb->truesize); > else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { > > and refactor it to make sure to set ICSK_ACK_NOMEM even on the first packet > that bypassed the rmem_schedule(). > Ok, that makes sense. > > > > + goto out_of_window; > > Why forcing quickack mode ? Please leave the "goto drop;" > Hmm...because I want to make it send an immediate ACK. Obviously, a ICSK_ACK_NOW flag should be the better choice here. > > } > > > > eaten = tcp_queue_rcv(sk, skb, &fragstolen); > > @@ -5102,7 +5103,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) > > out_of_window: > > tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); > > inet_csk_schedule_ack(sk); > > -drop: > > tcp_drop_reason(sk, skb, reason); > > return; > > } > >
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c2b15f7e5516..be3c858a2ebb 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -164,7 +164,8 @@ enum inet_csk_ack_state_t { ICSK_ACK_TIMER = 2, ICSK_ACK_PUSHED = 4, ICSK_ACK_PUSHED2 = 8, - ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ + ICSK_ACK_NOW = 16, /* Send the next ACK immediately (once) */ + ICSK_ACK_NOMEM = 32, }; void inet_csk_init_xmit_timers(struct sock *sk, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3cd92035e090..03111af6115d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5061,7 +5061,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) reason = SKB_DROP_REASON_PROTO_MEM; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); sk->sk_data_ready(sk); - goto drop; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOMEM; + goto out_of_window; } eaten = tcp_queue_rcv(sk, skb, &fragstolen); @@ -5102,7 +5103,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) out_of_window: tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_schedule_ack(sk); -drop: tcp_drop_reason(sk, skb, reason); return; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2cb39b6dad02..81aa2c615924 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -257,11 +257,19 @@ EXPORT_SYMBOL(tcp_select_initial_window); static u16 tcp_select_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 old_win = tp->rcv_wnd; - u32 cur_win = tcp_receive_window(tp); - u32 new_win = __tcp_select_window(sk); struct net *net = sock_net(sk); + u32 old_win = tp->rcv_wnd; + u32 cur_win, new_win; + + /* Make the window 0 if we failed to queue the data because we + * are out of memory. The window is temporary, so we don't store + * it on the socket. + */ + if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) + return 0; + cur_win = tcp_receive_window(tp); + new_win = __tcp_select_window(sk); if (new_win < cur_win) { /* Danger Will Robinson! * Don't update rcv_wup/rcv_wnd here or else