Message ID | 20240729192601.97316-1-kuniyu@amazon.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [v1,net] sctp: Fix null-ptr-deref in reuseport_add_sock(). | expand |
Hm. Note both 'reuseport_add_sock()' and 'reuseport_detach_sock()' uses global 'reuseport_lock' internally. So what about using read lock to protect 'sctp_for_each_entry()' loop and upgrade to write lock only if hash bucket should be actually updated? E.g.: diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9..4fbff388b1b4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,6 +735,7 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; @@ -743,11 +744,14 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; + read_lock(&head->lock); sctp_for_each_hentry(ep2, &head->chain) { struct sock *sk2 = ep2->base.sk; @@ -760,25 +764,30 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) sctp_sk(sk), cnt); if (!err) { err = reuseport_add_sock(sk, sk2, any); - if (err) - return err; + if (err) { + read_unlock(&head->lock); + goto out; + } break; } else if (err < 0) { - return err; + read_unlock(&head->lock); + goto out; } } + read_unlock(&head->lock); if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); write_unlock(&head->lock); - return 0; +out: + return err; } /* Add an endpoint to the hash. Local BH-safe. */ Dmitry
From: Dmitry Antipov <dmantipov@yandex.ru> Date: Tue, 30 Jul 2024 16:46:05 +0300 > Hm. Note both 'reuseport_add_sock()' and 'reuseport_detach_sock()' uses > global 'reuseport_lock' internally. So what about using read lock to protect > 'sctp_for_each_entry()' loop and upgrade to write lock only if hash bucket > should be actually updated? E.g.: > > diff --git a/net/sctp/input.c b/net/sctp/input.c > index 17fcaa9b0df9..4fbff388b1b4 100644 > --- a/net/sctp/input.c > +++ b/net/sctp/input.c > @@ -735,6 +735,7 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) > struct sock *sk = ep->base.sk; > struct net *net = sock_net(sk); > struct sctp_hashbucket *head; > + int err = 0; > > ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); > head = &sctp_ep_hashtable[ep->hashent]; > @@ -743,11 +744,14 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) > bool any = sctp_is_ep_boundall(sk); > struct sctp_endpoint *ep2; > struct list_head *list; > - int cnt = 0, err = 1; > + int cnt = 0; > + > + err = 1; > > list_for_each(list, &ep->base.bind_addr.address_list) > cnt++; > > + read_lock(&head->lock); > sctp_for_each_hentry(ep2, &head->chain) { > struct sock *sk2 = ep2->base.sk; > > @@ -760,25 +764,30 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) > sctp_sk(sk), cnt); > if (!err) { > err = reuseport_add_sock(sk, sk2, any); > - if (err) > - return err; > + if (err) { > + read_unlock(&head->lock); > + goto out; > + } > break; > } else if (err < 0) { > - return err; > + read_unlock(&head->lock); > + goto out; > } > } > + read_unlock(&head->lock); > > if (err) { > err = reuseport_alloc(sk, any); What happens if two sockets matching each other reach here ? There would be no error but the first hashed socket will be silently dead as lookup stops at the 2nd sk ? > if (err) > - return err; > + goto out; > } > } > > write_lock(&head->lock); > hlist_add_head(&ep->node, &head->chain); > write_unlock(&head->lock); > - return 0; > +out: > + return err; > } > > /* Add an endpoint to the hash. Local BH-safe. */ > > Dmitry
> What happens if two sockets matching each other reach here ?
Not sure. In general, an attempt to use rather external thing (struct
sctp_hashbucket) to implement extra synchronization for reuse innards
looks somewhat weird.
So let's look at reuseport_add_sock() again. Note extra comments:
int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
{
struct sock_reuseport *old_reuse, *reuse;
/* RCU access _outside_ of reuseport_lock'ed section */
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
int err = reuseport_alloc(sk2, bind_inany);
if (err)
return err;
}
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
if (old_reuse && old_reuse->num_closed_socks) {
/* sk was shutdown()ed before */
int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
spin_unlock_bh(&reuseport_lock);
return err;
}
if (old_reuse && old_reuse->num_socks != 1) {
spin_unlock_bh(&reuseport_lock);
return -EBUSY;
}
if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
return -ENOMEM;
}
}
__reuseport_add_sock(sk, reuse);
/* RCU access _inside_ of reuseport_lock'ed section */
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
spin_unlock_bh(&reuseport_lock);
if (old_reuse)
call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
Why this is so? I've tried to extend reuseport_lock'ed section to include the
first rcu_access_pointer() in subject as well, e.g.:
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5a165286e4d8..39a353ab8ff8 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -186,16 +186,11 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
return reuse;
}
-int reuseport_alloc(struct sock *sk, bool bind_inany)
+static int reuseport_alloc_unlocked(struct sock *sk, bool bind_inany)
{
struct sock_reuseport *reuse;
int id, ret = 0;
- /* bh lock used since this function call may precede hlist lock in
- * soft irq of receive path or setsockopt from process context
- */
- spin_lock_bh(&reuseport_lock);
-
/* Allocation attempts can occur concurrently via the setsockopt path
* and the bind/hash path. Nothing to do when we lose the race.
*/
@@ -236,8 +231,19 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
reuse->num_socks = 1;
reuseport_get_incoming_cpu(sk, reuse);
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
-
out:
+ return ret;
+}
+
+int reuseport_alloc(struct sock *sk, bool bind_inany)
+{
+ int ret;
+
+ /* bh lock used since this function call may precede hlist lock in
+ * soft irq of receive path or setsockopt from process context
+ */
+ spin_lock_bh(&reuseport_lock);
+ ret = reuseport_alloc_unlocked(sk, bind_inany);
spin_unlock_bh(&reuseport_lock);
return ret;
@@ -322,14 +328,17 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
{
struct sock_reuseport *old_reuse, *reuse;
+ spin_lock_bh(&reuseport_lock);
+
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
- int err = reuseport_alloc(sk2, bind_inany);
+ int err = reuseport_alloc_unlocked(sk2, bind_inany);
- if (err)
+ if (err) {
+ spin_unlock_bh(&reuseport_lock);
return err;
+ }
}
- spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
and this seems fixes the original crash as well.
Dmitry
From: Dmitry Antipov <dmantipov@yandex.ru> Date: Wed, 31 Jul 2024 10:05:37 +0300 > > What happens if two sockets matching each other reach here ? > > Not sure. If two sockets reach the reuseport_alloc() path, two identical reuseport groups are created. Then, in __sctp_rcv_lookup_endpoint(), incoming packets hit only one socket group placed before the other in the hash bucket, and the other socket no longer receive data and silently died from userspace POV. The suggested change papers over the problem. reusport_add_sock() and reuseport_alloc() must be placed in the same writer critical section. > In general, an attempt to use rather external thing (struct > sctp_hashbucket) to implement extra synchronization for reuse innards > looks somewhat weird. reuseport_lock is to synchronise operations within the same reuseport group, but which socket should belong to which group is out of scope. That must be well-defined and synchronised at the upper level. I'll post v2 with updated changelog. Thanks!
On 7/31/24 10:06 PM, Kuniyuki Iwashima wrote: > reuseport_lock is to synchronise operations within the same > reuseport group, but which socket should belong to which group > is out of scope. Hm... then the lock should be a member of 'struct sock_reuseport' rather than global, isn't? Dmitry
From: Dmitry Antipov <dmantipov@yandex.ru> Date: Wed, 31 Jul 2024 22:15:12 +0300 > On 7/31/24 10:06 PM, Kuniyuki Iwashima wrote: > > > reuseport_lock is to synchronise operations within the same > > reuseport group, but which socket should belong to which group > > is out of scope. > > Hm... then the lock should be a member of 'struct sock_reuseport' > rather than global, isn't? I thought that before and it would be doable with more complex locking since we still need another synchronisation for add/detach and alloc. Such operations are most unlikely done concurrently nor frequently, and the global lock would be enough. I'll check that again later, but anyway, it's a different topic for net-next.
diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9..a8a254a5008e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; @@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) - return err; + goto out; break; } else if (err < 0) { - return err; + goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } - write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); +out: write_unlock(&head->lock); - return 0; + return err; } /* Add an endpoint to the hash. Local BH-safe. */ @@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - - write_lock(&head->lock); hlist_del_init(&ep->node); write_unlock(&head->lock); }