Message ID | 20230114132705.78400-1-kerneljasonxing@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [v2,net] tcp: avoid the lookup process failing to get sk in ehash table | expand |
On Sat, Jan 14, 2023 at 2:27 PM Jason Xing <kerneljasonxing@gmail.com> wrote: > > From: Jason Xing <kernelxing@tencent.com> > > > Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") > Suggested-by: Eric Dumazet <edumazet@google.com> > Signed-off-by: Jason Xing <kernelxing@tencent.com> > Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ > --- > v2: > 1) adding the sk node into the tail of list to prevent the race. > 2) fix the race condition when handling time-wait socket hashdance. > --- > net/ipv4/inet_hashtables.c | 10 ++++++++++ > net/ipv4/inet_timewait_sock.c | 6 +++--- > 2 files changed, 13 insertions(+), 3 deletions(-) > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > index 24a38b56fab9..b0b54ad55507 100644 > --- a/net/ipv4/inet_hashtables.c > +++ b/net/ipv4/inet_hashtables.c > @@ -650,7 +650,16 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > spin_lock(lock); > if (osk) { > WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); > + if (sk_hashed(osk)) nit: this should be: if (sk_hashed(osk)) { [1] /* multi-line .... * .... comment. */ ret = sk_nulls_del_node_init_rcu(osk); goto unlock; } if (found_dup_sk) { [2] 1) parentheses needed in [1] 2) No else if in [2], since you added a "goto unlock;" > + /* Before deleting the node, we insert a new one to make > + * sure that the look-up-sk process would not miss either > + * of them and that at least one node would exist in ehash > + * table all the time. Otherwise there's a tiny chance > + * that lookup process could find nothing in ehash table. > + */ > + __sk_nulls_add_node_tail_rcu(sk, list); > ret = sk_nulls_del_node_init_rcu(osk); > + goto unlock; > } else if (found_dup_sk) { > *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); > if (*found_dup_sk) > @@ -660,6 +669,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > if (ret) > __sk_nulls_add_node_rcu(sk, list); > > +unlock: > spin_unlock(lock); Thanks.
On Mon, Jan 16, 2023 at 12:12 AM Eric Dumazet <edumazet@google.com> wrote: > > On Sat, Jan 14, 2023 at 2:27 PM Jason Xing <kerneljasonxing@gmail.com> wrote: > > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") > > Suggested-by: Eric Dumazet <edumazet@google.com> > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ > > --- > > v2: > > 1) adding the sk node into the tail of list to prevent the race. > > 2) fix the race condition when handling time-wait socket hashdance. > > --- > > net/ipv4/inet_hashtables.c | 10 ++++++++++ > > net/ipv4/inet_timewait_sock.c | 6 +++--- > > 2 files changed, 13 insertions(+), 3 deletions(-) > > > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > > index 24a38b56fab9..b0b54ad55507 100644 > > --- a/net/ipv4/inet_hashtables.c > > +++ b/net/ipv4/inet_hashtables.c > > @@ -650,7 +650,16 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > > spin_lock(lock); > > if (osk) { > > WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); > > + if (sk_hashed(osk)) > > > nit: this should be: > > if (sk_hashed(osk)) { [1] > /* multi-line .... > * .... comment. > */ > ret = sk_nulls_del_node_init_rcu(osk); > goto unlock; > } > if (found_dup_sk) { [2] > > 1) parentheses needed in [1] > 2) No else if in [2], since you added a "goto unlock;" > I'll do that. It looks much better. Thanks, Jason > > + /* Before deleting the node, we insert a new one to make > > + * sure that the look-up-sk process would not miss either > > + * of them and that at least one node would exist in ehash > > + * table all the time. Otherwise there's a tiny chance > > + * that lookup process could find nothing in ehash table. > > + */ > > + __sk_nulls_add_node_tail_rcu(sk, list); > > ret = sk_nulls_del_node_init_rcu(osk); > > + goto unlock; > > } else if (found_dup_sk) { > > *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); > > if (*found_dup_sk) > > @@ -660,6 +669,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > > if (ret) > > __sk_nulls_add_node_rcu(sk, list); > > > > +unlock: > > spin_unlock(lock); > > Thanks.
On Mon, Jan 16, 2023 at 8:36 AM Jason Xing <kerneljasonxing@gmail.com> wrote: > > On Mon, Jan 16, 2023 at 12:12 AM Eric Dumazet <edumazet@google.com> wrote: > > > > On Sat, Jan 14, 2023 at 2:27 PM Jason Xing <kerneljasonxing@gmail.com> wrote: > > > > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > > > > Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") > > > Suggested-by: Eric Dumazet <edumazet@google.com> > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ > > > --- > > > v2: > > > 1) adding the sk node into the tail of list to prevent the race. > > > 2) fix the race condition when handling time-wait socket hashdance. > > > --- > > > net/ipv4/inet_hashtables.c | 10 ++++++++++ > > > net/ipv4/inet_timewait_sock.c | 6 +++--- > > > 2 files changed, 13 insertions(+), 3 deletions(-) > > > > > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > > > index 24a38b56fab9..b0b54ad55507 100644 > > > --- a/net/ipv4/inet_hashtables.c > > > +++ b/net/ipv4/inet_hashtables.c > > > @@ -650,7 +650,16 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > > > spin_lock(lock); > > > if (osk) { > > > WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); > > > + if (sk_hashed(osk)) > > > > > > nit: this should be: > > > > if (sk_hashed(osk)) { [1] > > /* multi-line .... > > * .... comment. > > */ > > ret = sk_nulls_del_node_init_rcu(osk); > > goto unlock; > > } Well, after I dug into this part, I found something as below. If we enter into the 'if (osk) {', we should always skip the next if-statement which is 'if (found_dup_sk) {' and return a proper value depending on if the osk is hashed. However, the code as above would leave variable @ret to be true if the sk_hashed(osk) returned false, then It would not go to unlock and then add the node to the list and at last return true which is unexpected. > > if (found_dup_sk) { [2] > > > > 1) parentheses needed in [1] > > 2) No else if in [2], since you added a "goto unlock;" I think this modification is fine and makes the code clearer. Thanks, Jason > > > > I'll do that. It looks much better. > > Thanks, > Jason > > > > + /* Before deleting the node, we insert a new one to make > > > + * sure that the look-up-sk process would not miss either > > > + * of them and that at least one node would exist in ehash > > > + * table all the time. Otherwise there's a tiny chance > > > + * that lookup process could find nothing in ehash table. > > > + */ > > > + __sk_nulls_add_node_tail_rcu(sk, list); > > > ret = sk_nulls_del_node_init_rcu(osk); > > > + goto unlock; > > > } else if (found_dup_sk) { > > > *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); > > > if (*found_dup_sk) > > > @@ -660,6 +669,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) > > > if (ret) > > > __sk_nulls_add_node_rcu(sk, list); > > > > > > +unlock: > > > spin_unlock(lock); > > > > Thanks.
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 24a38b56fab9..b0b54ad55507 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -650,7 +650,16 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); + if (sk_hashed(osk)) + /* Before deleting the node, we insert a new one to make + * sure that the look-up-sk process would not miss either + * of them and that at least one node would exist in ehash + * table all the time. Otherwise there's a tiny chance + * that lookup process could find nothing in ehash table. + */ + __sk_nulls_add_node_tail_rcu(sk, list); ret = sk_nulls_del_node_init_rcu(osk); + goto unlock; } else if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) @@ -660,6 +669,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); +unlock: spin_unlock(lock); return ret; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1d77d992e6e7..6d681ef52bb2 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -91,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); -static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, +static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&tw->tw_node, list); + hlist_nulls_add_tail_rcu(&tw->tw_node, list); } static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -147,7 +147,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); - inet_twsk_add_node_rcu(tw, &ehead->chain); + inet_twsk_add_node_tail_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk))