Message ID | 20230804144616.3938718-7-edumazet@google.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 6e97ba552b8d3dd074a28b8600740b8bed42267b |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: set few options locklessly | expand |
On Fri, Aug 4, 2023 at 10:46 AM Eric Dumazet <edumazet@google.com> wrote: > > rskq_defer_accept field can be read/written without > the need of holding the socket lock. > > Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Very nice series! Thank you! I doulbechecked every field and they are all READ_ONCE/WRITE_ONCE paired. > --- > net/ipv4/tcp.c | 13 ++++++------- > net/ipv4/tcp_input.c | 2 +- > net/ipv4/tcp_minisocks.c | 2 +- > 3 files changed, 8 insertions(+), 9 deletions(-) > > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index 5c71b4fe11d1c34456976d60eb8742641111dd62..4fbc7ff8c53c05cbef3d108527239c7ec8c1363e 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -3479,6 +3479,12 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, > else > WRITE_ONCE(tp->linger2, val * HZ); > return 0; > + case TCP_DEFER_ACCEPT: > + /* Translate value in seconds to number of retransmits */ > + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, > + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, > + TCP_RTO_MAX / HZ)); > + return 0; > } > > sockopt_lock_sock(sk); > @@ -3584,13 +3590,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, > tp->save_syn = val; > break; > > - case TCP_DEFER_ACCEPT: > - /* Translate value in seconds to number of retransmits */ > - WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, > - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, > - TCP_RTO_MAX / HZ)); > - break; > - > case TCP_WINDOW_CLAMP: > err = tcp_set_window_clamp(sk, val); > break; > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > index f445f5a7c0ebf5f7ab2b2402357f3749d954c0e8..972c3b16369589293eb15febe52e72d5c596b032 100644 > --- a/net/ipv4/tcp_input.c > +++ b/net/ipv4/tcp_input.c > @@ -6325,7 +6325,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, > if (fastopen_fail) > return -1; > if (sk->sk_write_pending || > - icsk->icsk_accept_queue.rskq_defer_accept || > + READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept) || > inet_csk_in_pingpong_mode(sk)) { > /* Save one ACK. Data will be ready after > * several ticks, if write_pending is set. > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > index c8f2aa0033871ed3f8b6b045c2cbca6e88bf2b61..32a70e3530db3247986ab5cb08c8a46babf86ad6 100644 > --- a/net/ipv4/tcp_minisocks.c > +++ b/net/ipv4/tcp_minisocks.c > @@ -794,7 +794,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, > return sk; > > /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ > - if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && > + if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) && > TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { > inet_rsk(req)->acked = 1; > __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); > -- > 2.41.0.640.ga95def55d0-goog >
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c71b4fe11d1c34456976d60eb8742641111dd62..4fbc7ff8c53c05cbef3d108527239c7ec8c1363e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3479,6 +3479,12 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, else WRITE_ONCE(tp->linger2, val * HZ); return 0; + case TCP_DEFER_ACCEPT: + /* Translate value in seconds to number of retransmits */ + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ)); + return 0; } sockopt_lock_sock(sk); @@ -3584,13 +3590,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, tp->save_syn = val; break; - case TCP_DEFER_ACCEPT: - /* Translate value in seconds to number of retransmits */ - WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ)); - break; - case TCP_WINDOW_CLAMP: err = tcp_set_window_clamp(sk, val); break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f445f5a7c0ebf5f7ab2b2402357f3749d954c0e8..972c3b16369589293eb15febe52e72d5c596b032 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6325,7 +6325,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (fastopen_fail) return -1; if (sk->sk_write_pending || - icsk->icsk_accept_queue.rskq_defer_accept || + READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept) || inet_csk_in_pingpong_mode(sk)) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c8f2aa0033871ed3f8b6b045c2cbca6e88bf2b61..32a70e3530db3247986ab5cb08c8a46babf86ad6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -794,7 +794,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return sk; /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ - if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
rskq_defer_accept field can be read/written without the need of holding the socket lock. Signed-off-by: Eric Dumazet <edumazet@google.com> --- net/ipv4/tcp.c | 13 ++++++------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-)