diff mbox series

[net-next,1/6] tcp: set TCP_SYNCNT locklessly

Message ID 20230804144616.3938718-2-edumazet@google.com (mailing list archive)
State Accepted
Commit d44fd4a767b3755899f8ad1df3e8eca3961ba708
Delegated to: Netdev Maintainers
Headers show
Series tcp: set few options locklessly | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1330 this patch: 1330
netdev/cc_maintainers warning 1 maintainers not CCed: dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 1351 this patch: 1351
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1353 this patch: 1353
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 55 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Aug. 4, 2023, 2:46 p.m. UTC
icsk->icsk_syn_retries can safely be set without locking the socket.

We have to add READ_ONCE() annotations in tcp_fastopen_synack_timer()
and tcp_write_timeout().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp.c       | 15 ++++++---------
 net/ipv4/tcp_timer.c |  9 ++++++---
 2 files changed, 12 insertions(+), 12 deletions(-)

Comments

Soheil Hassas Yeganeh Aug. 4, 2023, 2:55 p.m. UTC | #1
On Fri, Aug 4, 2023 at 10:46 AM Eric Dumazet <edumazet@google.com> wrote:
>
> icsk->icsk_syn_retries can safely be set without locking the socket.
>
> We have to add READ_ONCE() annotations in tcp_fastopen_synack_timer()
> and tcp_write_timeout().
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Acked by: Soheil Hassas Yeganeh <soheil@google.com>

> ---
>  net/ipv4/tcp.c       | 15 ++++++---------
>  net/ipv4/tcp_timer.c |  9 ++++++---
>  2 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index aca5620cf3ba20be38d81b1b526c22623b145ff7..bcbb33a8c152abe2a060abd644689b54bcca1daa 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -3291,9 +3291,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
>         if (val < 1 || val > MAX_TCP_SYNCNT)
>                 return -EINVAL;
>
> -       lock_sock(sk);
>         WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
> -       release_sock(sk);
>         return 0;
>  }
>  EXPORT_SYMBOL(tcp_sock_set_syncnt);
> @@ -3462,6 +3460,12 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
>         if (copy_from_sockptr(&val, optval, sizeof(val)))
>                 return -EFAULT;
>
> +       /* Handle options that can be set without locking the socket. */
> +       switch (optname) {
> +       case TCP_SYNCNT:
> +               return tcp_sock_set_syncnt(sk, val);
> +       }
> +
>         sockopt_lock_sock(sk);
>
>         switch (optname) {
> @@ -3569,13 +3573,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
>                 else
>                         WRITE_ONCE(tp->keepalive_probes, val);
>                 break;
> -       case TCP_SYNCNT:
> -               if (val < 1 || val > MAX_TCP_SYNCNT)
> -                       err = -EINVAL;
> -               else
> -                       WRITE_ONCE(icsk->icsk_syn_retries, val);
> -               break;
> -
>         case TCP_SAVE_SYN:
>                 /* 0: disable, 1: enable, 2: start from ether_header */
>                 if (val < 0 || val > 2)
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index 470f581eedd438b3bbd6ae4973c7a6f01ee1724f..66040ab457d46ffa2fac62f875b636f567157793 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -239,7 +239,8 @@ static int tcp_write_timeout(struct sock *sk)
>         if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
>                 if (icsk->icsk_retransmits)
>                         __dst_negative_advice(sk);
> -               retry_until = icsk->icsk_syn_retries ? :
> +               /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
> +               retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
>                         READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
>
>                 max_retransmits = retry_until;
> @@ -421,8 +422,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
>
>         req->rsk_ops->syn_ack_timeout(req);
>
> -       /* add one more retry for fastopen */
> -       max_retries = icsk->icsk_syn_retries ? :
> +       /* Add one more retry for fastopen.
> +        * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
> +        */
> +       max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
>                 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
>
>         if (req->num_timeout >= max_retries) {
> --
> 2.41.0.640.ga95def55d0-goog
>
diff mbox series

Patch

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index aca5620cf3ba20be38d81b1b526c22623b145ff7..bcbb33a8c152abe2a060abd644689b54bcca1daa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3291,9 +3291,7 @@  int tcp_sock_set_syncnt(struct sock *sk, int val)
 	if (val < 1 || val > MAX_TCP_SYNCNT)
 		return -EINVAL;
 
-	lock_sock(sk);
 	WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
-	release_sock(sk);
 	return 0;
 }
 EXPORT_SYMBOL(tcp_sock_set_syncnt);
@@ -3462,6 +3460,12 @@  int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 	if (copy_from_sockptr(&val, optval, sizeof(val)))
 		return -EFAULT;
 
+	/* Handle options that can be set without locking the socket. */
+	switch (optname) {
+	case TCP_SYNCNT:
+		return tcp_sock_set_syncnt(sk, val);
+	}
+
 	sockopt_lock_sock(sk);
 
 	switch (optname) {
@@ -3569,13 +3573,6 @@  int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 		else
 			WRITE_ONCE(tp->keepalive_probes, val);
 		break;
-	case TCP_SYNCNT:
-		if (val < 1 || val > MAX_TCP_SYNCNT)
-			err = -EINVAL;
-		else
-			WRITE_ONCE(icsk->icsk_syn_retries, val);
-		break;
-
 	case TCP_SAVE_SYN:
 		/* 0: disable, 1: enable, 2: start from ether_header */
 		if (val < 0 || val > 2)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 470f581eedd438b3bbd6ae4973c7a6f01ee1724f..66040ab457d46ffa2fac62f875b636f567157793 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -239,7 +239,8 @@  static int tcp_write_timeout(struct sock *sk)
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
 			__dst_negative_advice(sk);
-		retry_until = icsk->icsk_syn_retries ? :
+		/* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
+		retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
 			READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
 
 		max_retransmits = retry_until;
@@ -421,8 +422,10 @@  static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
 
 	req->rsk_ops->syn_ack_timeout(req);
 
-	/* add one more retry for fastopen */
-	max_retries = icsk->icsk_syn_retries ? :
+	/* Add one more retry for fastopen.
+	 * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
+	 */
+	max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
 		READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
 
 	if (req->num_timeout >= max_retries) {