Message ID | 20240127020833.487907-5-kent.overstreet@linux.dev (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | lockdep cmp fn conversions | expand |
From: Kent Overstreet <kent.overstreet@linux.dev> Date: Fri, 26 Jan 2024 21:08:31 -0500 > Kill > - unix_state_lock_nested > - _nested usage for net->unx.table.locks[]. > > replace both with lock_set_cmp_fn_ptr_order(&u->lock). > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may > turn up a real issue. Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested(). The lock order in sk_diag_dump_icons() is listening socket -> child socket in the listener's queue , and the inverse order never happens. ptr comparison does not make sense in this case, and lockdep will complain about false positive. > > Cc: netdev@vger.kernel.org > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > --- > include/net/af_unix.h | 3 --- > net/unix/af_unix.c | 20 ++++++++------------ > net/unix/diag.c | 2 +- > 3 files changed, 9 insertions(+), 16 deletions(-) > > diff --git a/include/net/af_unix.h b/include/net/af_unix.h > index 49c4640027d8..4eff0a089640 100644 > --- a/include/net/af_unix.h > +++ b/include/net/af_unix.h > @@ -48,9 +48,6 @@ struct scm_stat { > > #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) > #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) > -#define unix_state_lock_nested(s) \ > - spin_lock_nested(&unix_sk(s)->lock, \ > - SINGLE_DEPTH_NESTING) > > /* The AF_UNIX socket */ > struct unix_sock { > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > index d013de3c5490..1a0d273799c1 100644 > --- a/net/unix/af_unix.c > +++ b/net/unix/af_unix.c > @@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net, > swap(hash1, hash2); > > spin_lock(&net->unx.table.locks[hash1]); > - spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); > + spin_lock(&net->unx.table.locks[hash2]); > } > > static void unix_table_double_unlock(struct net *net, > @@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, > u->path.dentry = NULL; > u->path.mnt = NULL; > spin_lock_init(&u->lock); > + lock_set_cmp_fn_ptr_order(&u->lock); > atomic_long_set(&u->inflight, 0); > INIT_LIST_HEAD(&u->link); > mutex_init(&u->iolock); /* single task reading lock */ > @@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) > > static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) > { > - if (unlikely(sk1 == sk2) || !sk2) { > - unix_state_lock(sk1); > - return; > - } > - if (sk1 < sk2) { > + if (sk1 > sk2) > + swap(sk1, sk2); > + if (sk1 && sk1 != sk2) > unix_state_lock(sk1); > - unix_state_lock_nested(sk2); > - } else { > - unix_state_lock(sk2); > - unix_state_lock_nested(sk1); > - } > + unix_state_lock(sk2); > } > > static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) > @@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, > goto out_unlock; > } > > - unix_state_lock_nested(sk); > + unix_state_lock(sk); > > if (sk->sk_state != st) { > unix_state_unlock(sk); > @@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net) > > for (i = 0; i < UNIX_HASH_SIZE; i++) { > spin_lock_init(&net->unx.table.locks[i]); > + lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]); > INIT_HLIST_HEAD(&net->unx.table.buckets[i]); > } > > diff --git a/net/unix/diag.c b/net/unix/diag.c > index bec09a3a1d44..8ab5e2217e4c 100644 > --- a/net/unix/diag.c > +++ b/net/unix/diag.c > @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) > * queue lock. With the other's queue locked it's > * OK to lock the state. > */ > - unix_state_lock_nested(req); > + unix_state_lock(req); > peer = unix_sk(req)->peer; > buf[i++] = (peer ? sock_i_ino(peer) : 0); > unix_state_unlock(req); > -- > 2.43.0
On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote: > From: Kent Overstreet <kent.overstreet@linux.dev> > Date: Fri, 26 Jan 2024 21:08:31 -0500 > > Kill > > - unix_state_lock_nested > > - _nested usage for net->unx.table.locks[]. > > > > replace both with lock_set_cmp_fn_ptr_order(&u->lock). > > > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may > > turn up a real issue. > > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested(). > > The lock order in sk_diag_dump_icons() is > > listening socket -> child socket in the listener's queue > > , and the inverse order never happens. ptr comparison does not make > sense in this case, and lockdep will complain about false positive. Is that a real lock ordering? Is this parent -> child relationship well defined? If it is, we should be able to write a lock_cmp_fn for it, as long as it's not some handwavy "this will never happen but _nested won't check for it" like I saw elsewhere in the net code... :)
From: Kent Overstreet <kent.overstreet@linux.dev> Date: Sun, 28 Jan 2024 14:38:02 -0500 > On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote: > > From: Kent Overstreet <kent.overstreet@linux.dev> > > Date: Fri, 26 Jan 2024 21:08:31 -0500 > > > Kill > > > - unix_state_lock_nested > > > - _nested usage for net->unx.table.locks[]. > > > > > > replace both with lock_set_cmp_fn_ptr_order(&u->lock). > > > > > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may > > > turn up a real issue. > > > > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested(). > > > > The lock order in sk_diag_dump_icons() is > > > > listening socket -> child socket in the listener's queue > > > > , and the inverse order never happens. ptr comparison does not make > > sense in this case, and lockdep will complain about false positive. > > Is that a real lock ordering? Is this parent -> child relationship well > defined? > > If it is, we should be able to write a lock_cmp_fn for it, as long as > it's not some handwavy "this will never happen but _nested won't check > for it" like I saw elsewhere in the net code... :) The problem would be there's no handy way to detect the relationship except for iterating the queue again. ---8<--- static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, const struct lockdep_map *_b) { const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map); const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map); if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) { /* check if b is a's cihld */ } /* otherwise, ptr comparison here. */ } ---8<--- This can be resolved by a patch like this, which is in my local tree for another series. So, after posting the series, I can revisit this and write lock_cmp_fn for u->lock. ---8<--- commit 12d39766b06068fda5987f4e7b4827e8c3273137 Author: Kuniyuki Iwashima <kuniyu@amazon.com> Date: Thu Jan 11 22:36:58 2024 +0000 af_unix: Save listener for embryo socket. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 9ea04653c7c9..d0c0d81bcb1d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -82,6 +82,7 @@ struct scm_stat { struct unix_sock { /* WARNING: sk has to be the first member */ struct sock sk; +#define usk_listener sk.__sk_common.skc_unix_listener struct unix_address *addr; struct path path; struct mutex iolock, bindlock; diff --git a/include/net/sock.h b/include/net/sock.h index a9d99a9c583f..3df3d068345a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -142,6 +142,8 @@ typedef __u64 __bitwise __addrpair; * [union with @skc_incoming_cpu] * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number * [union with @skc_incoming_cpu] + * @skc_unix_listener: connection request listener socket for AF_UNIX + * [union with @skc_rxhash] * @skc_refcnt: reference count * * This is the minimal network layer representation of sockets, the header @@ -227,6 +229,7 @@ struct sock_common { u32 skc_rxhash; u32 skc_window_clamp; u32 skc_tw_snd_nxt; /* struct tcp_timewait_sock */ + struct sock *skc_unix_listener; /* struct unix_sock */ }; /* public: */ }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5f9871555ec6..4a41bb727c32 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -991,6 +991,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); + u->usk_listener = NULL; u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; @@ -1612,6 +1613,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); + newu->usk_listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); @@ -1707,8 +1709,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { struct sock *sk = sock->sk; - struct sock *tsk; struct sk_buff *skb; + struct sock *tsk; int err; err = -EOPNOTSUPP; @@ -1733,6 +1735,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, } tsk = skb->sk; + unix_sk(tsk)->usk_listener = NULL; skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); ---8<---
On Sun, Jan 28, 2024 at 12:56:32PM -0800, Kuniyuki Iwashima wrote: > From: Kent Overstreet <kent.overstreet@linux.dev> > Date: Sun, 28 Jan 2024 14:38:02 -0500 > > On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote: > > > From: Kent Overstreet <kent.overstreet@linux.dev> > > > Date: Fri, 26 Jan 2024 21:08:31 -0500 > > > > Kill > > > > - unix_state_lock_nested > > > > - _nested usage for net->unx.table.locks[]. > > > > > > > > replace both with lock_set_cmp_fn_ptr_order(&u->lock). > > > > > > > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may > > > > turn up a real issue. > > > > > > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested(). > > > > > > The lock order in sk_diag_dump_icons() is > > > > > > listening socket -> child socket in the listener's queue > > > > > > , and the inverse order never happens. ptr comparison does not make > > > sense in this case, and lockdep will complain about false positive. > > > > Is that a real lock ordering? Is this parent -> child relationship well > > defined? > > > > If it is, we should be able to write a lock_cmp_fn for it, as long as > > it's not some handwavy "this will never happen but _nested won't check > > for it" like I saw elsewhere in the net code... :) > > The problem would be there's no handy way to detect the relationship > except for iterating the queue again. > > ---8<--- > static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, > const struct lockdep_map *_b) > { > const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map); > const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map); > > if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) { > /* check if b is a's cihld */ > } > > /* otherwise, ptr comparison here. */ > } > ---8<--- > > > This can be resolved by a patch like this, which is in my local tree > for another series. > > So, after posting the series, I can revisit this and write lock_cmp_fn > for u->lock. Sounds good! Please CC me when you do.
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8..4eff0a089640 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -48,9 +48,6 @@ struct scm_stat { #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) /* The AF_UNIX socket */ struct unix_sock { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d013de3c5490..1a0d273799c1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net, swap(hash1, hash2); spin_lock(&net->unx.table.locks[hash1]); - spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); + spin_lock(&net->unx.table.locks[hash2]); } static void unix_table_double_unlock(struct net *net, @@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); + lock_set_cmp_fn_ptr_order(&u->lock); atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ @@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) { - if (unlikely(sk1 == sk2) || !sk2) { - unix_state_lock(sk1); - return; - } - if (sk1 < sk2) { + if (sk1 > sk2) + swap(sk1, sk2); + if (sk1 && sk1 != sk2) unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + unix_state_lock(sk2); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock(sk); if (sk->sk_state != st) { unix_state_unlock(sk); @@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net) for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock_init(&net->unx.table.locks[i]); + lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]); INIT_HLIST_HEAD(&net->unx.table.buckets[i]); } diff --git a/net/unix/diag.c b/net/unix/diag.c index bec09a3a1d44..8ab5e2217e4c 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock(req); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req);
Kill - unix_state_lock_nested - _nested usage for net->unx.table.locks[]. replace both with lock_set_cmp_fn_ptr_order(&u->lock). The lock ordering in sk_diag_dump_icons() looks suspicious; this may turn up a real issue. Cc: netdev@vger.kernel.org Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- include/net/af_unix.h | 3 --- net/unix/af_unix.c | 20 ++++++++------------ net/unix/diag.c | 2 +- 3 files changed, 9 insertions(+), 16 deletions(-)