diff mbox series

[net-next,01/14] ipv6: lockless IPV6_UNICAST_HOPS implementation

Message ID 20230912160212.3467976-2-edumazet@google.com (mailing list archive)
State Accepted
Commit b0adfba7ee770fef20b1b6d86706c28f7fccfb07
Delegated to: Netdev Maintainers
Headers show
Series ipv6: round of data-races fixes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3184 this patch: 3184
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 1550 this patch: 1550
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3432 this patch: 3432
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Sept. 12, 2023, 4:01 p.m. UTC
Some np->hop_limit accesses are racy, when socket lock is not held.

Add missing annotations and switch to full lockless implementation.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/ipv6.h     | 12 +-----------
 include/net/ipv6.h       |  2 +-
 net/ipv6/ip6_output.c    |  2 +-
 net/ipv6/ipv6_sockglue.c | 20 +++++++++++---------
 net/ipv6/mcast.c         |  2 +-
 net/ipv6/ndisc.c         |  2 +-
 6 files changed, 16 insertions(+), 24 deletions(-)

Comments

David Ahern Sept. 14, 2023, 2:51 p.m. UTC | #1
On 9/12/23 10:01 AM, Eric Dumazet wrote:
> Some np->hop_limit accesses are racy, when socket lock is not held.
> 
> Add missing annotations and switch to full lockless implementation.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/linux/ipv6.h     | 12 +-----------
>  include/net/ipv6.h       |  2 +-
>  net/ipv6/ip6_output.c    |  2 +-
>  net/ipv6/ipv6_sockglue.c | 20 +++++++++++---------
>  net/ipv6/mcast.c         |  2 +-
>  net/ipv6/ndisc.c         |  2 +-
>  6 files changed, 16 insertions(+), 24 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>
diff mbox series

Patch

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index af8a771a053c51eed297516f927a5fd003315ef4..c2e0870713849fbbf1a8ec2d60cca80caab0cb98 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -213,17 +213,7 @@  struct ipv6_pinfo {
 	__be32			flow_label;
 	__u32			frag_size;
 
-	/*
-	 * Packed in 16bits.
-	 * Omit one shift by putting the signed field at MSB.
-	 */
-#if defined(__BIG_ENDIAN_BITFIELD)
-	__s16			hop_limit:9;
-	__u16			__unused_1:7;
-#else
-	__u16			__unused_1:7;
-	__s16			hop_limit:9;
-#endif
+	s16			hop_limit;
 
 #if defined(__BIG_ENDIAN_BITFIELD)
 	/* Packed in 16bits. */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 0675be0f3fa0efc55575bb5b2569dc8a1dbb9f24..61007db0036482e27121747add0eec77f912b54a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -911,7 +911,7 @@  static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	if (ipv6_addr_is_multicast(&fl6->daddr))
 		hlimit = np->mcast_hops;
 	else
-		hlimit = np->hop_limit;
+		hlimit = READ_ONCE(np->hop_limit);
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 	return hlimit;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54fc4c711f2c545f2ca625d6b0e09f2bb8e6d513..1e16d56d8c38ac51bd999038ae4e8478bf2f5f8c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -309,7 +309,7 @@  int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	 *	Fill in the IPv6 header
 	 */
 	if (np)
-		hlimit = np->hop_limit;
+		hlimit = READ_ONCE(np->hop_limit);
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0e2a0847b387f0f6f50211b89f92ac1e00a0b07a..f27993a1470dddd876f34f65c1f171c576eca272 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -415,6 +415,16 @@  int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	if (ip6_mroute_opt(optname))
 		return ip6_mroute_setsockopt(sk, optname, optval, optlen);
 
+	/* Handle options that can be set without locking the socket. */
+	switch (optname) {
+	case IPV6_UNICAST_HOPS:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val > 255 || val < -1)
+			return -EINVAL;
+		WRITE_ONCE(np->hop_limit, val);
+		return 0;
+	}
 	if (needs_rtnl)
 		rtnl_lock();
 	sockopt_lock_sock(sk);
@@ -733,14 +743,6 @@  int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		}
 		break;
 	}
-	case IPV6_UNICAST_HOPS:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val > 255 || val < -1)
-			goto e_inval;
-		np->hop_limit = val;
-		retv = 0;
-		break;
 
 	case IPV6_MULTICAST_HOPS:
 		if (sk->sk_type == SOCK_STREAM)
@@ -1347,7 +1349,7 @@  int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		struct dst_entry *dst;
 
 		if (optname == IPV6_UNICAST_HOPS)
-			val = np->hop_limit;
+			val = READ_ONCE(np->hop_limit);
 		else
 			val = np->mcast_hops;
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce25bcb9974de97f26635d0d3d54695af3070a7..6a33a50687bcf7201e75574f03e619fe89636068 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1716,7 +1716,7 @@  static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
 
 	hdr->payload_len = htons(len);
 	hdr->nexthdr = proto;
-	hdr->hop_limit = inet6_sk(sk)->hop_limit;
+	hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);
 
 	hdr->saddr = *saddr;
 	hdr->daddr = *daddr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 553c8664e0a7a37d7858393ab6a30616ab13a3bf..b554fd40bdc3787eb3bafa1d9923076d6078217e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -500,7 +500,7 @@  void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
 					      csum_partial(icmp6h,
 							   skb->len, 0));
 
-	ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+	ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dst->dev);