diff mbox series

[v1,net,2/5] tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.

Message ID 20230911165106.39384-3-kuniyu@amazon.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tcp: Fix bind() regression for v4-mapped-v6 address | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1342 this patch: 1342
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kuniyuki Iwashima Sept. 11, 2023, 4:51 p.m. UTC
Since bhash2 was introduced, the example below does now work as expected.
These two bind() should conflict, but the 2nd bind() now succeeds.

  from socket import *

  s1 = socket(AF_INET6, SOCK_STREAM)
  s1.bind(('::ffff:127.0.0.1', 0))

  s2 = socket(AF_INET, SOCK_STREAM)
  s2.bind(('127.0.0.1', s1.getsockname()[1]))

During the 2nd bind() in inet_csk_get_port(), inet_bind2_bucket_find()
fails to find the 1st socket's tb2, so inet_bind2_bucket_create() allocates
a new tb2 for the 2nd socket.  Then, we call inet_csk_bind_conflict() that
checks conflicts in the new tb2 by inet_bhash2_conflict().  However, the
new tb2 does not include the 1st socket, thus the bind() finally succeeds.

In this case, inet_bind2_bucket_match() must check if AF_INET6 tb2 has
the conflicting v4-mapped-v6 address so that inet_bind2_bucket_find()
returns the 1st socket's tb2.

Note that if we bind two sockets to 127.0.0.1 and then ::FFFF:127.0.0.1,
the 2nd bind() fails properly for the same reason mentinoed in the previous
commit.

Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 net/ipv4/inet_hashtables.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Eric Dumazet Sept. 11, 2023, 5:51 p.m. UTC | #1
On Mon, Sep 11, 2023 at 6:52 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Since bhash2 was introduced, the example below does now work as expected.
> These two bind() should conflict, but the 2nd bind() now succeeds.
>
>   from socket import *
>
>   s1 = socket(AF_INET6, SOCK_STREAM)
>   s1.bind(('::ffff:127.0.0.1', 0))
>
>   s2 = socket(AF_INET, SOCK_STREAM)
>   s2.bind(('127.0.0.1', s1.getsockname()[1]))
>
> During the 2nd bind() in inet_csk_get_port(), inet_bind2_bucket_find()
> fails to find the 1st socket's tb2, so inet_bind2_bucket_create() allocates
> a new tb2 for the 2nd socket.  Then, we call inet_csk_bind_conflict() that
> checks conflicts in the new tb2 by inet_bhash2_conflict().  However, the
> new tb2 does not include the 1st socket, thus the bind() finally succeeds.
>
> In this case, inet_bind2_bucket_match() must check if AF_INET6 tb2 has
> the conflicting v4-mapped-v6 address so that inet_bind2_bucket_find()
> returns the 1st socket's tb2.
>
> Note that if we bind two sockets to 127.0.0.1 and then ::FFFF:127.0.0.1,
> the 2nd bind() fails properly for the same reason mentinoed in the previous
> commit.
>
> Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
>  net/ipv4/inet_hashtables.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index 0a9b20eb81c4..54505100c914 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -816,8 +816,15 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
>                                     int l3mdev, const struct sock *sk)
>  {
>  #if IS_ENABLED(CONFIG_IPV6)
> -       if (sk->sk_family != tb->family)
> +       if (sk->sk_family != tb->family) {
> +               if (sk->sk_family == AF_INET)
> +                       return net_eq(ib2_net(tb), net) && tb->port == port &&
> +                               tb->l3mdev == l3mdev &&
> +                               ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
> +                               tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
> +
>                 return false;
> +       }
>
>         if (sk->sk_family == AF_INET6)
>                 return net_eq(ib2_net(tb), net) && tb->port == port &&
> --

Could we first factorize all these "net_eq(ib2_net(tb), net) &&
tb->port == port" checks ?

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7876b7d703cb5647086c45ca547c4caadc00c091..6240c802ed772272028e6e65bf90f345dd2d1619
100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -832,24 +832,24 @@ static bool inet_bind2_bucket_match(const struct
inet_bind2_bucket *tb,
 bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket
*tb, const struct net *net,
                                      unsigned short port, int l3mdev,
const struct sock *sk)
 {
+       if (!net_eq(ib2_net(tb), net) || tb->port != port)
+               return false;
+
 #if IS_ENABLED(CONFIG_IPV6)
        if (sk->sk_family != tb->family) {
                if (sk->sk_family == AF_INET)
-                       return net_eq(ib2_net(tb), net) && tb->port == port &&
-                               tb->l3mdev == l3mdev &&
+                       return  tb->l3mdev == l3mdev &&
                                ipv6_addr_any(&tb->v6_rcv_saddr);

                return false;
        }

        if (sk->sk_family == AF_INET6)
-               return net_eq(ib2_net(tb), net) && tb->port == port &&
-                       tb->l3mdev == l3mdev &&
+               return  tb->l3mdev == l3mdev &&
                        ipv6_addr_any(&tb->v6_rcv_saddr);
        else
 #endif
-               return net_eq(ib2_net(tb), net) && tb->port == port &&
-                       tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+               return tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
 }

 /* The socket's bhash2 hashbucket spinlock must be held when this is called */
Kuniyuki Iwashima Sept. 11, 2023, 6:05 p.m. UTC | #2
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 11 Sep 2023 19:51:44 +0200
> On Mon, Sep 11, 2023 at 6:52 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > Since bhash2 was introduced, the example below does now work as expected.
> > These two bind() should conflict, but the 2nd bind() now succeeds.
> >
> >   from socket import *
> >
> >   s1 = socket(AF_INET6, SOCK_STREAM)
> >   s1.bind(('::ffff:127.0.0.1', 0))
> >
> >   s2 = socket(AF_INET, SOCK_STREAM)
> >   s2.bind(('127.0.0.1', s1.getsockname()[1]))
> >
> > During the 2nd bind() in inet_csk_get_port(), inet_bind2_bucket_find()
> > fails to find the 1st socket's tb2, so inet_bind2_bucket_create() allocates
> > a new tb2 for the 2nd socket.  Then, we call inet_csk_bind_conflict() that
> > checks conflicts in the new tb2 by inet_bhash2_conflict().  However, the
> > new tb2 does not include the 1st socket, thus the bind() finally succeeds.
> >
> > In this case, inet_bind2_bucket_match() must check if AF_INET6 tb2 has
> > the conflicting v4-mapped-v6 address so that inet_bind2_bucket_find()
> > returns the 1st socket's tb2.
> >
> > Note that if we bind two sockets to 127.0.0.1 and then ::FFFF:127.0.0.1,
> > the 2nd bind() fails properly for the same reason mentinoed in the previous
> > commit.
> >
> > Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > ---
> >  net/ipv4/inet_hashtables.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> > index 0a9b20eb81c4..54505100c914 100644
> > --- a/net/ipv4/inet_hashtables.c
> > +++ b/net/ipv4/inet_hashtables.c
> > @@ -816,8 +816,15 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
> >                                     int l3mdev, const struct sock *sk)
> >  {
> >  #if IS_ENABLED(CONFIG_IPV6)
> > -       if (sk->sk_family != tb->family)
> > +       if (sk->sk_family != tb->family) {
> > +               if (sk->sk_family == AF_INET)
> > +                       return net_eq(ib2_net(tb), net) && tb->port == port &&
> > +                               tb->l3mdev == l3mdev &&
> > +                               ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
> > +                               tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
> > +
> >                 return false;
> > +       }
> >
> >         if (sk->sk_family == AF_INET6)
> >                 return net_eq(ib2_net(tb), net) && tb->port == port &&
> > --
> 
> Could we first factorize all these "net_eq(ib2_net(tb), net) &&
> tb->port == port" checks ?

That's much cleaner :)
I'll add a prep patch first in v2.

Thanks!

> 
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index 7876b7d703cb5647086c45ca547c4caadc00c091..6240c802ed772272028e6e65bf90f345dd2d1619
> 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -832,24 +832,24 @@ static bool inet_bind2_bucket_match(const struct
> inet_bind2_bucket *tb,
>  bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket
> *tb, const struct net *net,
>                                       unsigned short port, int l3mdev,
> const struct sock *sk)
>  {
> +       if (!net_eq(ib2_net(tb), net) || tb->port != port)
> +               return false;
> +
>  #if IS_ENABLED(CONFIG_IPV6)
>         if (sk->sk_family != tb->family) {
>                 if (sk->sk_family == AF_INET)
> -                       return net_eq(ib2_net(tb), net) && tb->port == port &&
> -                               tb->l3mdev == l3mdev &&
> +                       return  tb->l3mdev == l3mdev &&
>                                 ipv6_addr_any(&tb->v6_rcv_saddr);
> 
>                 return false;
>         }
> 
>         if (sk->sk_family == AF_INET6)
> -               return net_eq(ib2_net(tb), net) && tb->port == port &&
> -                       tb->l3mdev == l3mdev &&
> +               return  tb->l3mdev == l3mdev &&
>                         ipv6_addr_any(&tb->v6_rcv_saddr);
>         else
>  #endif
> -               return net_eq(ib2_net(tb), net) && tb->port == port &&
> -                       tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
> +               return tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
>  }
> 
>  /* The socket's bhash2 hashbucket spinlock must be held when this is called */
diff mbox series

Patch

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 0a9b20eb81c4..54505100c914 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -816,8 +816,15 @@  static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
 				    int l3mdev, const struct sock *sk)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family != tb->family)
+	if (sk->sk_family != tb->family) {
+		if (sk->sk_family == AF_INET)
+			return net_eq(ib2_net(tb), net) && tb->port == port &&
+				tb->l3mdev == l3mdev &&
+				ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
+				tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+
 		return false;
+	}
 
 	if (sk->sk_family == AF_INET6)
 		return net_eq(ib2_net(tb), net) && tb->port == port &&