diff mbox series

Bluetooth: fix dangling sco_conn and use-after-free in sco_sock_timeout

Message ID 20220325033028.1.I67f8ad854ac2f48701902bfb34d6e2070011b779@changeid (mailing list archive)
State Superseded
Headers show
Series Bluetooth: fix dangling sco_conn and use-after-free in sco_sock_timeout | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
tedd_an/checkpatch success Checkpatch PASS
tedd_an/gitlint success Gitlint PASS
tedd_an/subjectprefix success PASS
tedd_an/buildkernel success Build Kernel PASS
tedd_an/buildkernel32 success Build Kernel32 PASS
tedd_an/incremental_build success Pass
tedd_an/testrunnersetup success Test Runner Setup PASS
tedd_an/testrunnerl2cap-tester success Total: 40, Passed: 40 (100.0%), Failed: 0, Not Run: 0
tedd_an/testrunnerbnep-tester success Total: 1, Passed: 1 (100.0%), Failed: 0, Not Run: 0
tedd_an/testrunnermgmt-tester success Total: 493, Passed: 493 (100.0%), Failed: 0, Not Run: 0
tedd_an/testrunnerrfcomm-tester success Total: 10, Passed: 10 (100.0%), Failed: 0, Not Run: 0
tedd_an/testrunnersco-tester success Total: 12, Passed: 12 (100.0%), Failed: 0, Not Run: 0
tedd_an/testrunnersmp-tester success Total: 8, Passed: 8 (100.0%), Failed: 0, Not Run: 0
tedd_an/testrunneruserchan-tester success Total: 4, Passed: 4 (100.0%), Failed: 0, Not Run: 0

Commit Message

Ying Hsu March 25, 2022, 3:30 a.m. UTC
Connecting the same socket twice consecutively in sco_sock_connect()
could lead to a race condition where two sco_conn objects are created
but only one is associated with the socket. If the socket is closed
before the SCO connection is established, the timer associated with the
dangling sco_conn object won't be canceled. As the sock object is being
freed, the use-after-free problem happens when the timer callback
function sco_sock_timeout() accesses the socket. Here's the call trace:

dump_stack+0x107/0x163
? refcount_inc+0x1c/
print_address_description.constprop.0+0x1c/0x47e
? refcount_inc+0x1c/0x7b
kasan_report+0x13a/0x173
? refcount_inc+0x1c/0x7b
check_memory_region+0x132/0x139
refcount_inc+0x1c/0x7b
sco_sock_timeout+0xb2/0x1ba
process_one_work+0x739/0xbd1
? cancel_delayed_work+0x13f/0x13f
? __raw_spin_lock_init+0xf0/0xf0
? to_kthread+0x59/0x85
worker_thread+0x593/0x70e
kthread+0x346/0x35a
? drain_workqueue+0x31a/0x31a
? kthread_bind+0x4b/0x4b
ret_from_fork+0x1f/0x30

Signed-off-by: Ying Hsu <yinghsu@chromium.org>
Reviewed-by: Joseph Hwang <josephsih@chromium.org>
---
Tested this commit using a C reproducer on qemu-x86_64 for 8 hours.

 net/bluetooth/sco.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

bluez.test.bot@gmail.com March 25, 2022, 4:06 a.m. UTC | #1
This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=626220

---Test result---

Test Summary:
CheckPatch                    PASS      1.64 seconds
GitLint                       PASS      1.01 seconds
SubjectPrefix                 PASS      0.82 seconds
BuildKernel                   PASS      34.79 seconds
BuildKernel32                 PASS      31.51 seconds
Incremental Build with patchesPASS      41.66 seconds
TestRunner: Setup             PASS      529.53 seconds
TestRunner: l2cap-tester      PASS      16.42 seconds
TestRunner: bnep-tester       PASS      6.76 seconds
TestRunner: mgmt-tester       PASS      108.12 seconds
TestRunner: rfcomm-tester     PASS      8.97 seconds
TestRunner: sco-tester        PASS      8.37 seconds
TestRunner: smp-tester        PASS      8.39 seconds
TestRunner: userchan-tester   PASS      6.94 seconds



---
Regards,
Linux Bluetooth
Luiz Augusto von Dentz March 25, 2022, 6:50 p.m. UTC | #2
Hi Ying,

On Thu, Mar 24, 2022 at 8:31 PM Ying Hsu <yinghsu@chromium.org> wrote:
>
> Connecting the same socket twice consecutively in sco_sock_connect()
> could lead to a race condition where two sco_conn objects are created
> but only one is associated with the socket. If the socket is closed
> before the SCO connection is established, the timer associated with the
> dangling sco_conn object won't be canceled. As the sock object is being
> freed, the use-after-free problem happens when the timer callback
> function sco_sock_timeout() accesses the socket. Here's the call trace:
>
> dump_stack+0x107/0x163
> ? refcount_inc+0x1c/
> print_address_description.constprop.0+0x1c/0x47e
> ? refcount_inc+0x1c/0x7b
> kasan_report+0x13a/0x173
> ? refcount_inc+0x1c/0x7b
> check_memory_region+0x132/0x139
> refcount_inc+0x1c/0x7b
> sco_sock_timeout+0xb2/0x1ba
> process_one_work+0x739/0xbd1
> ? cancel_delayed_work+0x13f/0x13f
> ? __raw_spin_lock_init+0xf0/0xf0
> ? to_kthread+0x59/0x85
> worker_thread+0x593/0x70e
> kthread+0x346/0x35a
> ? drain_workqueue+0x31a/0x31a
> ? kthread_bind+0x4b/0x4b
> ret_from_fork+0x1f/0x30
>
> Signed-off-by: Ying Hsu <yinghsu@chromium.org>
> Reviewed-by: Joseph Hwang <josephsih@chromium.org>
> ---
> Tested this commit using a C reproducer on qemu-x86_64 for 8 hours.

We should probably add a link or something to the reproducer then, was
it syzbot? It does have some instructions on how to link its issues.

>  net/bluetooth/sco.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
> index 8eabf41b2993..380c63194736 100644
> --- a/net/bluetooth/sco.c
> +++ b/net/bluetooth/sco.c
> @@ -574,19 +574,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
>             addr->sa_family != AF_BLUETOOTH)
>                 return -EINVAL;
>
> -       if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
> -               return -EBADFD;
> +       lock_sock(sk);
> +       if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
> +               err = -EBADFD;
> +               goto done;
> +       }
>
> -       if (sk->sk_type != SOCK_SEQPACKET)
> -               return -EINVAL;
> +       if (sk->sk_type != SOCK_SEQPACKET) {
> +               err = -EINVAL;
> +               goto done;
> +       }
>
>         hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
> -       if (!hdev)
> -               return -EHOSTUNREACH;
> +       if (!hdev) {
> +               err = -EHOSTUNREACH;
> +               goto done;
> +       }
>         hci_dev_lock(hdev);
>
> -       lock_sock(sk);
> -

Also are we sure we are not introducing a locking hierarchy problem
here? Previously we had hci_dev_lock then sock_lock now it is the
opposite, or perhaps we never want to have them at the same time?

>         /* Set destination address and psm */
>         bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
>
> --
> 2.35.1.1021.g381101b075-goog
>
Ying Hsu March 26, 2022, 6:31 a.m. UTC | #3
Hi Luiz,

I compiled and ran the c-reproducer:
https://syzkaller.appspot.com/x/repro.c?x=152b93e8700000
I will add relevant links in the commit message. Thanks for the reminder.

While fixing the use-after-free problem , I also found a possible
deadlock in sco_sock_connect() and sco_sock_getsockopt() :
sco_sock_connect() {
  hci_dev_lock(hdev);
  lock_sock(sk);
}

sco_sock_getsockopt() {
  lock_sock(sk);
  case BT_CODEC:
    hci_dev_lock(hdev);
}

So, adjusting the locking order in sco_sock_connect() can also avoid
the possible deadlock.

Ying

On Sat, Mar 26, 2022 at 2:50 AM Luiz Augusto von Dentz
<luiz.dentz@gmail.com> wrote:
>
> Hi Ying,
>
> On Thu, Mar 24, 2022 at 8:31 PM Ying Hsu <yinghsu@chromium.org> wrote:
> >
> > Connecting the same socket twice consecutively in sco_sock_connect()
> > could lead to a race condition where two sco_conn objects are created
> > but only one is associated with the socket. If the socket is closed
> > before the SCO connection is established, the timer associated with the
> > dangling sco_conn object won't be canceled. As the sock object is being
> > freed, the use-after-free problem happens when the timer callback
> > function sco_sock_timeout() accesses the socket. Here's the call trace:
> >
> > dump_stack+0x107/0x163
> > ? refcount_inc+0x1c/
> > print_address_description.constprop.0+0x1c/0x47e
> > ? refcount_inc+0x1c/0x7b
> > kasan_report+0x13a/0x173
> > ? refcount_inc+0x1c/0x7b
> > check_memory_region+0x132/0x139
> > refcount_inc+0x1c/0x7b
> > sco_sock_timeout+0xb2/0x1ba
> > process_one_work+0x739/0xbd1
> > ? cancel_delayed_work+0x13f/0x13f
> > ? __raw_spin_lock_init+0xf0/0xf0
> > ? to_kthread+0x59/0x85
> > worker_thread+0x593/0x70e
> > kthread+0x346/0x35a
> > ? drain_workqueue+0x31a/0x31a
> > ? kthread_bind+0x4b/0x4b
> > ret_from_fork+0x1f/0x30
> >
> > Signed-off-by: Ying Hsu <yinghsu@chromium.org>
> > Reviewed-by: Joseph Hwang <josephsih@chromium.org>
> > ---
> > Tested this commit using a C reproducer on qemu-x86_64 for 8 hours.
>
> We should probably add a link or something to the reproducer then, was
> it syzbot? It does have some instructions on how to link its issues.
>
> >  net/bluetooth/sco.c | 21 +++++++++++++--------
> >  1 file changed, 13 insertions(+), 8 deletions(-)
> >
> > diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
> > index 8eabf41b2993..380c63194736 100644
> > --- a/net/bluetooth/sco.c
> > +++ b/net/bluetooth/sco.c
> > @@ -574,19 +574,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
> >             addr->sa_family != AF_BLUETOOTH)
> >                 return -EINVAL;
> >
> > -       if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
> > -               return -EBADFD;
> > +       lock_sock(sk);
> > +       if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
> > +               err = -EBADFD;
> > +               goto done;
> > +       }
> >
> > -       if (sk->sk_type != SOCK_SEQPACKET)
> > -               return -EINVAL;
> > +       if (sk->sk_type != SOCK_SEQPACKET) {
> > +               err = -EINVAL;
> > +               goto done;
> > +       }
> >
> >         hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
> > -       if (!hdev)
> > -               return -EHOSTUNREACH;
> > +       if (!hdev) {
> > +               err = -EHOSTUNREACH;
> > +               goto done;
> > +       }
> >         hci_dev_lock(hdev);
> >
> > -       lock_sock(sk);
> > -
>
> Also are we sure we are not introducing a locking hierarchy problem
> here? Previously we had hci_dev_lock then sock_lock now it is the
> opposite, or perhaps we never want to have them at the same time?
>
> >         /* Set destination address and psm */
> >         bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
> >
> > --
> > 2.35.1.1021.g381101b075-goog
> >
>
>
> --
> Luiz Augusto von Dentz
diff mbox series

Patch

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8eabf41b2993..380c63194736 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -574,19 +574,24 @@  static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
 	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
-	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
-		return -EBADFD;
+	lock_sock(sk);
+	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+		err = -EBADFD;
+		goto done;
+	}
 
-	if (sk->sk_type != SOCK_SEQPACKET)
-		return -EINVAL;
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		err = -EINVAL;
+		goto done;
+	}
 
 	hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
-	if (!hdev)
-		return -EHOSTUNREACH;
+	if (!hdev) {
+		err = -EHOSTUNREACH;
+		goto done;
+	}
 	hci_dev_lock(hdev);
 
-	lock_sock(sk);
-
 	/* Set destination address and psm */
 	bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);