Message ID | 20230116073813.24097-1-kerneljasonxing@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [v4,net] tcp: avoid the lookup process failing to get sk in ehash table | expand |
On Mon, Jan 16, 2023 at 8:38 AM Jason Xing <kerneljasonxing@gmail.com> wrote: > > From: Jason Xing <kernelxing@tencent.com> > > While one cpu is working on looking up the right socket from ehash > table, another cpu is done deleting the request socket and is about > to add (or is adding) the big socket from the table. It means that > we could miss both of them, even though it has little chance. > > > Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") > Suggested-by: Eric Dumazet <edumazet@google.com> > Signed-off-by: Jason Xing <kernelxing@tencent.com> > Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ > --- > v4: > 1) adjust the code style and make it easier to read. > > v3: > 1) get rid of else-if statement. > > v2: > 1) adding the sk node into the tail of list to prevent the race. > 2) fix the race condition when handling time-wait socket hashdance. > --- > net/ipv4/inet_hashtables.c | 18 ++++++++++++++++-- > net/ipv4/inet_timewait_sock.c | 6 +++--- > 2 files changed, 19 insertions(+), 5 deletions(-) > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > index 24a38b56fab9..c64eec874b31 100644 > --- a/net/ipv4/inet_hashtables.c > +++ b/net/ipv4/inet_hashtables.c > @@ -650,8 +650,21 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > spin_lock(lock); > if (osk) { > WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); > - ret = sk_nulls_del_node_init_rcu(osk); > - } else if (found_dup_sk) { > + if (sk_hashed(osk)) { > + /* Before deleting the node, we insert a new one to make > + * sure that the look-up-sk process would not miss either > + * of them and that at least one node would exist in ehash > + * table all the time. Otherwise there's a tiny chance > + * that lookup process could find nothing in ehash table. > + */ > + __sk_nulls_add_node_tail_rcu(sk, list); > + sk_nulls_del_node_init_rcu(osk); > + } else { > + ret = false; Well, you added another 'else' statement... What about the following ? diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 24a38b56fab9e9d7d893e23b30d26e275359ec70..1bcf5ce8dd1317b2144bcb47a2ad238532b9accf 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -650,8 +650,14 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); - ret = sk_nulls_del_node_init_rcu(osk); - } else if (found_dup_sk) { + ret = sk_hashed(osk); + if (ret) { + __sk_nulls_add_node_tail_rcu(sk, list); + sk_nulls_del_node_init_rcu(osk); + } + goto unlock; + } + if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; @@ -659,7 +665,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); - +unlock: spin_unlock(lock); return ret;
On Mon, Jan 16, 2023 at 5:54 PM Eric Dumazet <edumazet@google.com> wrote: > > On Mon, Jan 16, 2023 at 8:38 AM Jason Xing <kerneljasonxing@gmail.com> wrote: > > > > From: Jason Xing <kernelxing@tencent.com> > > > > While one cpu is working on looking up the right socket from ehash > > table, another cpu is done deleting the request socket and is about > > to add (or is adding) the big socket from the table. It means that > > we could miss both of them, even though it has little chance. > > > > > > Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") > > Suggested-by: Eric Dumazet <edumazet@google.com> > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ > > --- > > v4: > > 1) adjust the code style and make it easier to read. > > > > v3: > > 1) get rid of else-if statement. > > > > v2: > > 1) adding the sk node into the tail of list to prevent the race. > > 2) fix the race condition when handling time-wait socket hashdance. > > --- > > net/ipv4/inet_hashtables.c | 18 ++++++++++++++++-- > > net/ipv4/inet_timewait_sock.c | 6 +++--- > > 2 files changed, 19 insertions(+), 5 deletions(-) > > > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > > index 24a38b56fab9..c64eec874b31 100644 > > --- a/net/ipv4/inet_hashtables.c > > +++ b/net/ipv4/inet_hashtables.c > > @@ -650,8 +650,21 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > > spin_lock(lock); > > if (osk) { > > WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); > > - ret = sk_nulls_del_node_init_rcu(osk); > > - } else if (found_dup_sk) { > > + if (sk_hashed(osk)) { > > + /* Before deleting the node, we insert a new one to make > > + * sure that the look-up-sk process would not miss either > > + * of them and that at least one node would exist in ehash > > + * table all the time. Otherwise there's a tiny chance > > + * that lookup process could find nothing in ehash table. > > + */ > > + __sk_nulls_add_node_tail_rcu(sk, list); > > + sk_nulls_del_node_init_rcu(osk); > > + } else { > > + ret = false; > > > Well, you added another 'else' statement... > Yeah, I want to make the code look more concise and easy to read. I alway felt the previous series of commits are not human-readable though it could work. > What about the following ? > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > index 24a38b56fab9e9d7d893e23b30d26e275359ec70..1bcf5ce8dd1317b2144bcb47a2ad238532b9accf > 100644 > --- a/net/ipv4/inet_hashtables.c > +++ b/net/ipv4/inet_hashtables.c > @@ -650,8 +650,14 @@ bool inet_ehash_insert(struct sock *sk, struct > sock *osk, bool *found_dup_sk) > spin_lock(lock); > if (osk) { > WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); > - ret = sk_nulls_del_node_init_rcu(osk); > - } else if (found_dup_sk) { > + ret = sk_hashed(osk); > + if (ret) { > + __sk_nulls_add_node_tail_rcu(sk, list); > + sk_nulls_del_node_init_rcu(osk); > + } Ah, I prefer this one :) Thanks, Jason > + goto unlock; > + } > + if (found_dup_sk) { > *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); > if (*found_dup_sk) > ret = false; > @@ -659,7 +665,7 @@ bool inet_ehash_insert(struct sock *sk, struct > sock *osk, bool *found_dup_sk) > > if (ret) > __sk_nulls_add_node_rcu(sk, list); > - > +unlock: > spin_unlock(lock); > > return ret;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 24a38b56fab9..c64eec874b31 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -650,8 +650,21 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); - ret = sk_nulls_del_node_init_rcu(osk); - } else if (found_dup_sk) { + if (sk_hashed(osk)) { + /* Before deleting the node, we insert a new one to make + * sure that the look-up-sk process would not miss either + * of them and that at least one node would exist in ehash + * table all the time. Otherwise there's a tiny chance + * that lookup process could find nothing in ehash table. + */ + __sk_nulls_add_node_tail_rcu(sk, list); + sk_nulls_del_node_init_rcu(osk); + } else { + ret = false; + } + goto unlock; + } + if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; @@ -660,6 +673,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); +unlock: spin_unlock(lock); return ret; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1d77d992e6e7..6d681ef52bb2 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -91,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); -static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, +static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&tw->tw_node, list); + hlist_nulls_add_tail_rcu(&tw->tw_node, list); } static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -147,7 +147,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); - inet_twsk_add_node_rcu(tw, &ehead->chain); + inet_twsk_add_node_tail_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk))