diff mbox series

[net] Revert "tcp/dccp: get rid of inet_twsk_purge()"

Message ID 20220512211456.2680273-1-eric.dumazet@gmail.com (mailing list archive)
State Accepted
Commit 04c494e68a1340cb5c70d4704ac32d863dc64293
Delegated to: Netdev Maintainers
Headers show
Series [net] Revert "tcp/dccp: get rid of inet_twsk_purge()" | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2341 this patch: 2341
netdev/cc_maintainers warning 5 maintainers not CCed: yejune.deng@gmail.com yoshfuji@linux-ipv6.org dsahern@kernel.org aahringo@redhat.com dccp@vger.kernel.org
netdev/build_clang success Errors and warnings before: 587 this patch: 587
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 2466 this patch: 2466
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Unnecessary parentheses around 'tw->tw_family != family'
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet May 12, 2022, 9:14 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

This reverts commits:

0dad4087a86a2cbe177404dc73f18ada26a2c390 ("tcp/dccp: get rid of inet_twsk_purge()")
d507204d3c5cc57d9a8bdf0a477615bb59ea1611 ("tcp/dccp: add tw->tw_bslot")

As Leonard pointed out, a newly allocated netns can happen
to reuse a freed 'struct net'.

While TCP TW timers were covered by my patches, other things were not:

1) Lookups in rx path (INET_MATCH() and INET6_MATCH()), as they look
  at 4-tuple plus the 'struct net' pointer.

2) /proc/net/tcp[6] and inet_diag, same reason.

3) hashinfo->bhash[], same reason.

Fixing all this seems risky, lets instead revert.

In the future, we might have a per netns tcp hash table, or
a per netns list of timewait sockets...

Fixes: 0dad4087a86a ("tcp/dccp: get rid of inet_twsk_purge()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Leonard Crestez <cdleonard@gmail.com>
---
 include/net/inet_timewait_sock.h |  3 +-
 net/dccp/ipv4.c                  |  6 ++++
 net/dccp/ipv6.c                  |  6 ++++
 net/ipv4/inet_timewait_sock.c    | 58 ++++++++++++++++++++++++++++----
 net/ipv4/tcp_ipv4.c              |  2 ++
 net/ipv6/tcp_ipv6.c              |  6 ++++
 6 files changed, 73 insertions(+), 8 deletions(-)

Comments

Leonard Crestez May 13, 2022, 9:40 a.m. UTC | #1
On 13.05.2022 00:14, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> This reverts commits:
> 
> 0dad4087a86a2cbe177404dc73f18ada26a2c390 ("tcp/dccp: get rid of inet_twsk_purge()")
> d507204d3c5cc57d9a8bdf0a477615bb59ea1611 ("tcp/dccp: add tw->tw_bslot")
> 
> As Leonard pointed out, a newly allocated netns can happen
> to reuse a freed 'struct net'.
> 
> While TCP TW timers were covered by my patches, other things were not:
> 
> 1) Lookups in rx path (INET_MATCH() and INET6_MATCH()), as they look
>    at 4-tuple plus the 'struct net' pointer.
> 
> 2) /proc/net/tcp[6] and inet_diag, same reason.
> 
> 3) hashinfo->bhash[], same reason.
> 
> Fixing all this seems risky, lets instead revert.
> 
> In the future, we might have a per netns tcp hash table, or
> a per netns list of timewait sockets...
> 
> Fixes: 0dad4087a86a ("tcp/dccp: get rid of inet_twsk_purge()")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Leonard Crestez <cdleonard@gmail.com>

Tested-by: Leonard Crestez <cdleonard@gmail.com>
patchwork-bot+netdevbpf@kernel.org May 13, 2022, 11:50 a.m. UTC | #2
Hello:

This patch was applied to netdev/net.git (master)
by David S. Miller <davem@davemloft.net>:

On Thu, 12 May 2022 14:14:56 -0700 you wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> This reverts commits:
> 
> 0dad4087a86a2cbe177404dc73f18ada26a2c390 ("tcp/dccp: get rid of inet_twsk_purge()")
> d507204d3c5cc57d9a8bdf0a477615bb59ea1611 ("tcp/dccp: add tw->tw_bslot")
> 
> [...]

Here is the summary with links:
  - [net] Revert "tcp/dccp: get rid of inet_twsk_purge()"
    https://git.kernel.org/netdev/net/c/04c494e68a13

You are awesome, thank you!
diff mbox series

Patch

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 463ae5d33eb09c40caeb4d039af268609b5e563b..5b47545f22d39eb2dd9725ac37bd7d7a9016a03c 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -71,7 +71,6 @@  struct inet_timewait_sock {
 				tw_tos		: 8;
 	u32			tw_txhash;
 	u32			tw_priority;
-	u32			tw_bslot; /* bind bucket slot */
 	struct timer_list	tw_timer;
 	struct inet_bind_bucket	*tw_tb;
 };
@@ -110,6 +109,8 @@  static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
 
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
+
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
 {
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ae662567a6cb6a440c79a9805a2cd6d146ac5a29..0ea29270d7e53730d14ec43654be8f956f891552 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1030,9 +1030,15 @@  static void __net_exit dccp_v4_exit_net(struct net *net)
 	inet_ctl_sock_destroy(pn->v4_ctl_sk);
 }
 
+static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
+{
+	inet_twsk_purge(&dccp_hashinfo, AF_INET);
+}
+
 static struct pernet_operations dccp_v4_ops = {
 	.init	= dccp_v4_init_net,
 	.exit	= dccp_v4_exit_net,
+	.exit_batch = dccp_v4_exit_batch,
 	.id	= &dccp_v4_pernet_id,
 	.size   = sizeof(struct dccp_v4_pernet),
 };
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index eab3bd1ee9a0a0064c04ff97fd8363e60daa0079..fa663518fa0e465458b7486ad0cd0672425f08b0 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1115,9 +1115,15 @@  static void __net_exit dccp_v6_exit_net(struct net *net)
 	inet_ctl_sock_destroy(pn->v6_ctl_sk);
 }
 
+static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
+{
+	inet_twsk_purge(&dccp_hashinfo, AF_INET6);
+}
+
 static struct pernet_operations dccp_v6_ops = {
 	.init   = dccp_v6_init_net,
 	.exit   = dccp_v6_exit_net,
+	.exit_batch = dccp_v6_exit_batch,
 	.id	= &dccp_v6_pernet_id,
 	.size   = sizeof(struct dccp_v6_pernet),
 };
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 9e0bbd02656013e6e8be5765a7b86fc16e6bf831..0ec501845cb3bb51082f8091b4e0ebb32f83bf33 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -52,7 +52,8 @@  static void inet_twsk_kill(struct inet_timewait_sock *tw)
 	spin_unlock(lock);
 
 	/* Disassociate with bind bucket. */
-	bhead = &hashinfo->bhash[tw->tw_bslot];
+	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+			hashinfo->bhash_size)];
 
 	spin_lock(&bhead->lock);
 	inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@  void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
-	/* Cache inet_bhashfn(), because 'struct net' might be no longer
-	 * available later in inet_twsk_kill().
-	 */
-	tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
-				    hashinfo->bhash_size);
-	bhead = &hashinfo->bhash[tw->tw_bslot];
+	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
+			hashinfo->bhash_size)];
 	spin_lock(&bhead->lock);
 	tw->tw_tb = icsk->icsk_bind_hash;
 	WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@  void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
 	}
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+{
+	struct inet_timewait_sock *tw;
+	struct sock *sk;
+	struct hlist_nulls_node *node;
+	unsigned int slot;
+
+	for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+		struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+restart_rcu:
+		cond_resched();
+		rcu_read_lock();
+restart:
+		sk_nulls_for_each_rcu(sk, node, &head->chain) {
+			if (sk->sk_state != TCP_TIME_WAIT)
+				continue;
+			tw = inet_twsk(sk);
+			if ((tw->tw_family != family) ||
+				refcount_read(&twsk_net(tw)->ns.count))
+				continue;
+
+			if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+				continue;
+
+			if (unlikely((tw->tw_family != family) ||
+				     refcount_read(&twsk_net(tw)->ns.count))) {
+				inet_twsk_put(tw);
+				goto restart;
+			}
+
+			rcu_read_unlock();
+			local_bh_disable();
+			inet_twsk_deschedule_put(tw);
+			local_bh_enable();
+			goto restart_rcu;
+		}
+		/* If the nulls value we got at the end of this lookup is
+		 * not the expected one, we must restart lookup.
+		 * We probably met an item that was moved to another chain.
+		 */
+		if (get_nulls_value(node) != slot)
+			goto restart;
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f9cec624068dfa1d218357d7e88c89459d7d54f4..457f5b5d5d4a95c06eca82db1dbe7822cb4d040c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3173,6 +3173,8 @@  static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
 	struct net *net;
 
+	inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
 	list_for_each_entry(net, net_exit_list, exit_list)
 		tcp_fastopen_ctx_destroy(net);
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 13678d3908fac9990e5b0c0df87fa4cca685baaf..faaddaf43c90b96e7a2bc9fbad7941ae5ada1b3c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2207,9 +2207,15 @@  static void __net_exit tcpv6_net_exit(struct net *net)
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
 }
 
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+}
+
 static struct pernet_operations tcpv6_net_ops = {
 	.init	    = tcpv6_net_init,
 	.exit	    = tcpv6_net_exit,
+	.exit_batch = tcpv6_net_exit_batch,
 };
 
 int __init tcpv6_init(void)