diff mbox series

[v2] Bluetooth: Fix UAF for hci_chan

Message ID 20231009123045.587882-1-william.xuanziyang@huawei.com (mailing list archive)
State New, archived
Headers show
Series [v2] Bluetooth: Fix UAF for hci_chan | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
tedd_an/CheckPatch success CheckPatch PASS
tedd_an/GitLint fail WARNING: I3 - ignore-body-lines: gitlint will be switching from using Python regex 'match' (match beginning) to 'search' (match anywhere) semantics. Please review your ignore-body-lines.regex option accordingly. To remove this warning, set general.regex-style-search=True. More details: https://jorisroovers.github.io/gitlint/configuration/#regex-style-search 5: B1 Line exceeds max length (88>80): "BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231"
tedd_an/SubjectPrefix success Gitlint PASS
tedd_an/BuildKernel success BuildKernel PASS
tedd_an/CheckAllWarning success CheckAllWarning PASS
tedd_an/CheckSparse success CheckSparse PASS
tedd_an/CheckSmatch success CheckSparse PASS
tedd_an/BuildKernel32 success BuildKernel32 PASS
tedd_an/TestRunnerSetup success TestRunnerSetup PASS
tedd_an/TestRunner_l2cap-tester success TestRunner PASS
tedd_an/TestRunner_iso-tester success TestRunner PASS
tedd_an/TestRunner_bnep-tester success TestRunner PASS
tedd_an/TestRunner_mgmt-tester success TestRunner PASS
tedd_an/TestRunner_rfcomm-tester success TestRunner PASS
tedd_an/TestRunner_sco-tester success TestRunner PASS
tedd_an/TestRunner_ioctl-tester success TestRunner PASS
tedd_an/TestRunner_mesh-tester success TestRunner PASS
tedd_an/TestRunner_smp-tester success TestRunner PASS
tedd_an/TestRunner_userchan-tester success TestRunner PASS
tedd_an/IncrementalBuild success Incremental Build PASS

Commit Message

Ziyang Xuan (William) Oct. 9, 2023, 12:30 p.m. UTC
Syzbot reports a UAF problem as follows:

BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
...
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
 print_address_description mm/kasan/report.c:364 [inline]
 print_report+0x163/0x540 mm/kasan/report.c:475
 kasan_report+0x175/0x1b0 mm/kasan/report.c:588
 hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
 l2cap_send_conn_req net/bluetooth/l2cap_core.c:1286 [inline]
 l2cap_start_connection+0x465/0x620 net/bluetooth/l2cap_core.c:1514
 l2cap_conn_start+0xbf3/0x1130 net/bluetooth/l2cap_core.c:1661
 process_one_work kernel/workqueue.c:2630 [inline]
 process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
 worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
 kthread+0x2d3/0x370 kernel/kthread.c:388
 ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
 </TASK>

Allocated by task 27110:
 kasan_save_stack mm/kasan/common.c:45 [inline]
 kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
 ____kasan_kmalloc mm/kasan/common.c:374 [inline]
 __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383
 kmalloc include/linux/slab.h:599 [inline]
 kzalloc include/linux/slab.h:720 [inline]
 hci_chan_create+0xc8/0x300 net/bluetooth/hci_conn.c:2714
 l2cap_conn_add+0x69/0xc10 net/bluetooth/l2cap_core.c:7841
 l2cap_chan_connect+0x61f/0xeb0 net/bluetooth/l2cap_core.c:8053
 bt_6lowpan_connect net/bluetooth/6lowpan.c:894 [inline]
 lowpan_control_write+0x55e/0x850 net/bluetooth/6lowpan.c:1129
 full_proxy_write+0x113/0x1c0 fs/debugfs/file.c:236
 vfs_write+0x286/0xaf0 fs/read_write.c:582
 ksys_write+0x1a0/0x2c0 fs/read_write.c:637
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Freed by task 5067:
 kasan_save_stack mm/kasan/common.c:45 [inline]
 kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
 kasan_save_free_info+0x28/0x40 mm/kasan/generic.c:522
 ____kasan_slab_free+0xd6/0x120 mm/kasan/common.c:236
 kasan_slab_free include/linux/kasan.h:164 [inline]
 slab_free_hook mm/slub.c:1800 [inline]
 slab_free_freelist_hook mm/slub.c:1826 [inline]
 slab_free mm/slub.c:3809 [inline]
 __kmem_cache_free+0x25f/0x3b0 mm/slub.c:3822
 hci_chan_list_flush net/bluetooth/hci_conn.c:2754 [inline]
 hci_conn_cleanup net/bluetooth/hci_conn.c:152 [inline]
 hci_conn_del+0x4f8/0xc40 net/bluetooth/hci_conn.c:1156
 hci_abort_conn_sync+0xa45/0xfe0
 hci_cmd_sync_work+0x228/0x400 net/bluetooth/hci_sync.c:306
 process_one_work kernel/workqueue.c:2630 [inline]
 process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
 worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
 kthread+0x2d3/0x370 kernel/kthread.c:388
 ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304

There are two main reasons for this:
1. In the case of hci_conn_add() concurrency, the handle of
hci_conn allocated through hci_conn_hash_alloc_unset() is not unique.
That will result in getting the wrong hci_conn by
hci_conn_hash_lookup_handle().
2. The processes related to hci_abort_conn() can re-enter for the same
hci_conn because atomic_dec_and_test(&hci_conn->refcnt) is established
in hci_conn_drop(). That will make hci_conn UAF.

To fix this, use ida to manage the allocation of conn->handle, and
add the HCI_CONN_DISC flag to avoid re-entering of the processes related
to hci_abort_conn().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
---
v2:
  - Rebase patch base on newest repo.
  - Remove HCI_CONN_DISC check in lookup interfaces.
---
 include/net/bluetooth/hci_core.h |  6 ++++-
 net/bluetooth/hci_conn.c         | 38 ++++++++++++++++----------------
 net/bluetooth/hci_core.c         |  3 +++
 3 files changed, 27 insertions(+), 20 deletions(-)

Comments

bluez.test.bot@gmail.com Oct. 9, 2023, 1:19 p.m. UTC | #1
This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=791293

---Test result---

Test Summary:
CheckPatch                    PASS      1.09 seconds
GitLint                       FAIL      0.58 seconds
SubjectPrefix                 PASS      0.11 seconds
BuildKernel                   PASS      33.69 seconds
CheckAllWarning               PASS      36.74 seconds
CheckSparse                   PASS      42.62 seconds
CheckSmatch                   PASS      117.82 seconds
BuildKernel32                 PASS      32.91 seconds
TestRunnerSetup               PASS      500.40 seconds
TestRunner_l2cap-tester       PASS      29.86 seconds
TestRunner_iso-tester         PASS      51.52 seconds
TestRunner_bnep-tester        PASS      9.77 seconds
TestRunner_mgmt-tester        PASS      212.13 seconds
TestRunner_rfcomm-tester      PASS      15.62 seconds
TestRunner_sco-tester         PASS      19.26 seconds
TestRunner_ioctl-tester       PASS      17.43 seconds
TestRunner_mesh-tester        PASS      12.78 seconds
TestRunner_smp-tester         PASS      13.81 seconds
TestRunner_userchan-tester    PASS      10.62 seconds
IncrementalBuild              PASS      31.91 seconds

Details
##############################
Test: GitLint - FAIL
Desc: Run gitlint
Output:
[v2] Bluetooth: Fix UAF for hci_chan

WARNING: I3 - ignore-body-lines: gitlint will be switching from using Python regex 'match' (match beginning) to 'search' (match anywhere) semantics. Please review your ignore-body-lines.regex option accordingly. To remove this warning, set general.regex-style-search=True. More details: https://jorisroovers.github.io/gitlint/configuration/#regex-style-search
5: B1 Line exceeds max length (88>80): "BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231"


---
Regards,
Linux Bluetooth
Luiz Augusto von Dentz Oct. 9, 2023, 5:31 p.m. UTC | #2
Hi,

On Mon, Oct 9, 2023 at 5:30 AM Ziyang Xuan
<william.xuanziyang@huawei.com> wrote:
>
> Syzbot reports a UAF problem as follows:
>
> BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
> ...
> Call Trace:
>  <TASK>
>  __dump_stack lib/dump_stack.c:88 [inline]
>  dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
>  print_address_description mm/kasan/report.c:364 [inline]
>  print_report+0x163/0x540 mm/kasan/report.c:475
>  kasan_report+0x175/0x1b0 mm/kasan/report.c:588
>  hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
>  l2cap_send_conn_req net/bluetooth/l2cap_core.c:1286 [inline]
>  l2cap_start_connection+0x465/0x620 net/bluetooth/l2cap_core.c:1514
>  l2cap_conn_start+0xbf3/0x1130 net/bluetooth/l2cap_core.c:1661
>  process_one_work kernel/workqueue.c:2630 [inline]
>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>  kthread+0x2d3/0x370 kernel/kthread.c:388
>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>  </TASK>
>
> Allocated by task 27110:
>  kasan_save_stack mm/kasan/common.c:45 [inline]
>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>  ____kasan_kmalloc mm/kasan/common.c:374 [inline]
>  __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383
>  kmalloc include/linux/slab.h:599 [inline]
>  kzalloc include/linux/slab.h:720 [inline]
>  hci_chan_create+0xc8/0x300 net/bluetooth/hci_conn.c:2714
>  l2cap_conn_add+0x69/0xc10 net/bluetooth/l2cap_core.c:7841
>  l2cap_chan_connect+0x61f/0xeb0 net/bluetooth/l2cap_core.c:8053
>  bt_6lowpan_connect net/bluetooth/6lowpan.c:894 [inline]
>  lowpan_control_write+0x55e/0x850 net/bluetooth/6lowpan.c:1129
>  full_proxy_write+0x113/0x1c0 fs/debugfs/file.c:236
>  vfs_write+0x286/0xaf0 fs/read_write.c:582
>  ksys_write+0x1a0/0x2c0 fs/read_write.c:637
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> Freed by task 5067:
>  kasan_save_stack mm/kasan/common.c:45 [inline]
>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>  kasan_save_free_info+0x28/0x40 mm/kasan/generic.c:522
>  ____kasan_slab_free+0xd6/0x120 mm/kasan/common.c:236
>  kasan_slab_free include/linux/kasan.h:164 [inline]
>  slab_free_hook mm/slub.c:1800 [inline]
>  slab_free_freelist_hook mm/slub.c:1826 [inline]
>  slab_free mm/slub.c:3809 [inline]
>  __kmem_cache_free+0x25f/0x3b0 mm/slub.c:3822
>  hci_chan_list_flush net/bluetooth/hci_conn.c:2754 [inline]
>  hci_conn_cleanup net/bluetooth/hci_conn.c:152 [inline]
>  hci_conn_del+0x4f8/0xc40 net/bluetooth/hci_conn.c:1156
>  hci_abort_conn_sync+0xa45/0xfe0
>  hci_cmd_sync_work+0x228/0x400 net/bluetooth/hci_sync.c:306
>  process_one_work kernel/workqueue.c:2630 [inline]
>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>  kthread+0x2d3/0x370 kernel/kthread.c:388
>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>
> There are two main reasons for this:
> 1. In the case of hci_conn_add() concurrency, the handle of
> hci_conn allocated through hci_conn_hash_alloc_unset() is not unique.
> That will result in getting the wrong hci_conn by
> hci_conn_hash_lookup_handle().
> 2. The processes related to hci_abort_conn() can re-enter for the same
> hci_conn because atomic_dec_and_test(&hci_conn->refcnt) is established
> in hci_conn_drop(). That will make hci_conn UAF.
>
> To fix this, use ida to manage the allocation of conn->handle, and
> add the HCI_CONN_DISC flag to avoid re-entering of the processes related
> to hci_abort_conn().
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> ---
> v2:
>   - Rebase patch base on newest repo.
>   - Remove HCI_CONN_DISC check in lookup interfaces.
> ---
>  include/net/bluetooth/hci_core.h |  6 ++++-
>  net/bluetooth/hci_conn.c         | 38 ++++++++++++++++----------------
>  net/bluetooth/hci_core.c         |  3 +++
>  3 files changed, 27 insertions(+), 20 deletions(-)
>
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index f36c1fd5d64e..4f44f2bffa57 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -350,6 +350,8 @@ struct hci_dev {
>         struct list_head list;
>         struct mutex    lock;
>
> +       struct ida      unset_handle_ida;
> +
>         const char      *name;
>         unsigned long   flags;
>         __u16           id;
> @@ -969,6 +971,7 @@ enum {
>         HCI_CONN_AUTH_INITIATOR,
>         HCI_CONN_DROP,
>         HCI_CONN_CANCEL,
> +       HCI_CONN_DISC,

Can't we just use HCI_CONN_DROP instead of introducing yet another flag?

>         HCI_CONN_PARAM_REMOVAL_PEND,
>         HCI_CONN_NEW_LINK_KEY,
>         HCI_CONN_SCANNING,
> @@ -1543,7 +1546,8 @@ static inline void hci_conn_drop(struct hci_conn *conn)
>  {
>         BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
>
> -       if (atomic_dec_and_test(&conn->refcnt)) {
> +       if (atomic_dec_and_test(&conn->refcnt) &&
> +           !test_bit(HCI_CONN_DISC, &conn->flags)) {
>                 unsigned long timeo;
>
>                 switch (conn->type) {
> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> index 974631e652c1..f87281b4386f 100644
> --- a/net/bluetooth/hci_conn.c
> +++ b/net/bluetooth/hci_conn.c
> @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
>
>         hci_conn_hash_del(hdev, conn);
>
> +       if (HCI_CONN_HANDLE_UNSET(conn->handle))
> +               ida_free(&hdev->unset_handle_ida, conn->handle);
> +
>         if (conn->cleanup)
>                 conn->cleanup(conn);
>
> @@ -929,29 +932,17 @@ static void cis_cleanup(struct hci_conn *conn)
>         hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
>  }
>
> -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
>  {
> -       struct hci_conn_hash *h = &hdev->conn_hash;
> -       struct hci_conn  *c;
> -       u16 handle = HCI_CONN_HANDLE_MAX + 1;
> -
> -       rcu_read_lock();
> -
> -       list_for_each_entry_rcu(c, &h->list, list) {
> -               /* Find the first unused handle */
> -               if (handle == 0xffff || c->handle != handle)
> -                       break;
> -               handle++;
> -       }
> -       rcu_read_unlock();
> -
> -       return handle;
> +       return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
> +                              U16_MAX, GFP_ATOMIC);
>  }
>
>  struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
>                               u8 role)
>  {
>         struct hci_conn *conn;
> +       int handle;
>
>         BT_DBG("%s dst %pMR", hdev->name, dst);
>
> @@ -961,7 +952,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
>
>         bacpy(&conn->dst, dst);
>         bacpy(&conn->src, &hdev->bdaddr);
> -       conn->handle = hci_conn_hash_alloc_unset(hdev);
> +       handle = hci_conn_hash_alloc_unset(hdev);
> +       if (unlikely(handle < 0)) {
> +               kfree(conn);
> +               return NULL;
> +       }
> +       conn->handle = handle;
>         conn->hdev  = hdev;
>         conn->type  = type;
>         conn->role  = role;
> @@ -2933,6 +2929,7 @@ static int abort_conn_sync(struct hci_dev *hdev, void *data)
>  int hci_abort_conn(struct hci_conn *conn, u8 reason)
>  {
>         struct hci_dev *hdev = conn->hdev;
> +       int ret;
>
>         /* If abort_reason has already been set it means the connection is
>          * already being aborted so don't attempt to overwrite it.
> @@ -2961,6 +2958,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
>                 }
>         }
>
> -       return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
> -                                 NULL);
> +       ret = hci_cmd_sync_queue(hdev, abort_conn_sync,
> +                                UINT_PTR(conn->handle), NULL);
> +       if (!ret)
> +               set_bit(HCI_CONN_DISC, &conn->flags);
> +       return ret;
>  }
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 195aea2198a9..65601aa52e0d 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -2535,6 +2535,8 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
>         mutex_init(&hdev->lock);
>         mutex_init(&hdev->req_lock);
>
> +       ida_init(&hdev->unset_handle_ida);
> +
>         INIT_LIST_HEAD(&hdev->mesh_pending);
>         INIT_LIST_HEAD(&hdev->mgmt_pending);
>         INIT_LIST_HEAD(&hdev->reject_list);
> @@ -2789,6 +2791,7 @@ void hci_release_dev(struct hci_dev *hdev)
>         hci_codec_list_clear(&hdev->local_codecs);
>         hci_dev_unlock(hdev);
>
> +       ida_destroy(&hdev->unset_handle_ida);
>         ida_simple_remove(&hci_index_ida, hdev->id);
>         kfree_skb(hdev->sent_cmd);
>         kfree_skb(hdev->recv_event);
> --
> 2.25.1
>
Pauli Virtanen Oct. 9, 2023, 8:37 p.m. UTC | #3
Hi,

ma, 2023-10-09 kello 20:30 +0800, Ziyang Xuan kirjoitti:
> Syzbot reports a UAF problem as follows:
> 
> BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
> ...
> Call Trace:
>  <TASK>
>  __dump_stack lib/dump_stack.c:88 [inline]
>  dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
>  print_address_description mm/kasan/report.c:364 [inline]
>  print_report+0x163/0x540 mm/kasan/report.c:475
>  kasan_report+0x175/0x1b0 mm/kasan/report.c:588
>  hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
>  l2cap_send_conn_req net/bluetooth/l2cap_core.c:1286 [inline]
>  l2cap_start_connection+0x465/0x620 net/bluetooth/l2cap_core.c:1514
>  l2cap_conn_start+0xbf3/0x1130 net/bluetooth/l2cap_core.c:1661
>  process_one_work kernel/workqueue.c:2630 [inline]
>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>  kthread+0x2d3/0x370 kernel/kthread.c:388
>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>  </TASK>
> 
> Allocated by task 27110:
>  kasan_save_stack mm/kasan/common.c:45 [inline]
>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>  ____kasan_kmalloc mm/kasan/common.c:374 [inline]
>  __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383
>  kmalloc include/linux/slab.h:599 [inline]
>  kzalloc include/linux/slab.h:720 [inline]
>  hci_chan_create+0xc8/0x300 net/bluetooth/hci_conn.c:2714
>  l2cap_conn_add+0x69/0xc10 net/bluetooth/l2cap_core.c:7841
>  l2cap_chan_connect+0x61f/0xeb0 net/bluetooth/l2cap_core.c:8053
>  bt_6lowpan_connect net/bluetooth/6lowpan.c:894 [inline]
>  lowpan_control_write+0x55e/0x850 net/bluetooth/6lowpan.c:1129
>  full_proxy_write+0x113/0x1c0 fs/debugfs/file.c:236
>  vfs_write+0x286/0xaf0 fs/read_write.c:582
>  ksys_write+0x1a0/0x2c0 fs/read_write.c:637
>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> Freed by task 5067:
>  kasan_save_stack mm/kasan/common.c:45 [inline]
>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>  kasan_save_free_info+0x28/0x40 mm/kasan/generic.c:522
>  ____kasan_slab_free+0xd6/0x120 mm/kasan/common.c:236
>  kasan_slab_free include/linux/kasan.h:164 [inline]
>  slab_free_hook mm/slub.c:1800 [inline]
>  slab_free_freelist_hook mm/slub.c:1826 [inline]
>  slab_free mm/slub.c:3809 [inline]
>  __kmem_cache_free+0x25f/0x3b0 mm/slub.c:3822
>  hci_chan_list_flush net/bluetooth/hci_conn.c:2754 [inline]
>  hci_conn_cleanup net/bluetooth/hci_conn.c:152 [inline]
>  hci_conn_del+0x4f8/0xc40 net/bluetooth/hci_conn.c:1156
>  hci_abort_conn_sync+0xa45/0xfe0
>  hci_cmd_sync_work+0x228/0x400 net/bluetooth/hci_sync.c:306
>  process_one_work kernel/workqueue.c:2630 [inline]
>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>  kthread+0x2d3/0x370 kernel/kthread.c:388
>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
> 
> There are two main reasons for this:
> 1. In the case of hci_conn_add() concurrency, the handle of
> hci_conn allocated through hci_conn_hash_alloc_unset() is not unique.
> That will result in getting the wrong hci_conn by
> hci_conn_hash_lookup_handle().
> 2. The processes related to hci_abort_conn() can re-enter for the same
> hci_conn because atomic_dec_and_test(&hci_conn->refcnt) is established
> in hci_conn_drop(). That will make hci_conn UAF.
> 
> To fix this, use ida to manage the allocation of conn->handle, and
> add the HCI_CONN_DISC flag to avoid re-entering of the processes related
> to hci_abort_conn().
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> ---
> v2:
>   - Rebase patch base on newest repo.
>   - Remove HCI_CONN_DISC check in lookup interfaces.
> ---
>  include/net/bluetooth/hci_core.h |  6 ++++-
>  net/bluetooth/hci_conn.c         | 38 ++++++++++++++++----------------
>  net/bluetooth/hci_core.c         |  3 +++
>  3 files changed, 27 insertions(+), 20 deletions(-)
> 
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index f36c1fd5d64e..4f44f2bffa57 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -350,6 +350,8 @@ struct hci_dev {
>  	struct list_head list;
>  	struct mutex	lock;
>  
> +	struct ida	unset_handle_ida;
> +
>  	const char	*name;
>  	unsigned long	flags;
>  	__u16		id;
> @@ -969,6 +971,7 @@ enum {
>  	HCI_CONN_AUTH_INITIATOR,
>  	HCI_CONN_DROP,
>  	HCI_CONN_CANCEL,
> +	HCI_CONN_DISC,
>  	HCI_CONN_PARAM_REMOVAL_PEND,
>  	HCI_CONN_NEW_LINK_KEY,
>  	HCI_CONN_SCANNING,
> @@ -1543,7 +1546,8 @@ static inline void hci_conn_drop(struct hci_conn *conn)
>  {
>  	BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
>  
> -	if (atomic_dec_and_test(&conn->refcnt)) {
> +	if (atomic_dec_and_test(&conn->refcnt) &&
> +	    !test_bit(HCI_CONN_DISC, &conn->flags)) {
>  		unsigned long timeo;

hci_abort_conn already checks if conn->abort_reason was already set, to
avoid queuing abort for the same conn again. Does this flag not try to
do the same thing?

There are potential races in hci_sync vs. handle reuse since the queued
abort is not canceled if the conn is deleted by something else. But now
I'm not sure syzbot hits this race.

>  
>  		switch (conn->type) {
> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> index 974631e652c1..f87281b4386f 100644
> --- a/net/bluetooth/hci_conn.c
> +++ b/net/bluetooth/hci_conn.c
> @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
>  
>  	hci_conn_hash_del(hdev, conn);
>  
> +	if (HCI_CONN_HANDLE_UNSET(conn->handle))
> +		ida_free(&hdev->unset_handle_ida, conn->handle);
> +
>  	if (conn->cleanup)
>  		conn->cleanup(conn);
>  
> @@ -929,29 +932,17 @@ static void cis_cleanup(struct hci_conn *conn)
>  	hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
>  }
>  
> -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
>  {
> -	struct hci_conn_hash *h = &hdev->conn_hash;
> -	struct hci_conn  *c;
> -	u16 handle = HCI_CONN_HANDLE_MAX + 1;
> -
> -	rcu_read_lock();
> -
> -	list_for_each_entry_rcu(c, &h->list, list) {
> -		/* Find the first unused handle */
> -		if (handle == 0xffff || c->handle != handle)
> -			break;
> -		handle++;
> -	}
> -	rcu_read_unlock();

The original hci_conn_hash_alloc_unset seems to have wrong logic. It'll
e.g. always return HCI_CONN_HANDLE_MAX+1 except if the first conn in
conn_hash (which is not in particular order) has that handle...

The code paths adding/deleting conns or entering here / setting handles
should be holding hdev->lock, so probably no concurrency issue in it.

Is syzbot happy with just this handle allocation fixed?

> -
> -	return handle;
> +	return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
> +			       U16_MAX, GFP_ATOMIC);
>  }
>  
>  struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
>  			      u8 role)
>  {
>  	struct hci_conn *conn;
> +	int handle;
>  
>  	BT_DBG("%s dst %pMR", hdev->name, dst);
>  
> @@ -961,7 +952,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
>  
>  	bacpy(&conn->dst, dst);
>  	bacpy(&conn->src, &hdev->bdaddr);
> -	conn->handle = hci_conn_hash_alloc_unset(hdev);
> +	handle = hci_conn_hash_alloc_unset(hdev);
> +	if (unlikely(handle < 0)) {
> +		kfree(conn);
> +		return NULL;
> +	}
> +	conn->handle = handle;
>  	conn->hdev  = hdev;
>  	conn->type  = type;
>  	conn->role  = role;
> @@ -2933,6 +2929,7 @@ static int abort_conn_sync(struct hci_dev *hdev, void *data)
>  int hci_abort_conn(struct hci_conn *conn, u8 reason)
>  {
>  	struct hci_dev *hdev = conn->hdev;
> +	int ret;
>  
>  	/* If abort_reason has already been set it means the connection is
>  	 * already being aborted so don't attempt to overwrite it.
> @@ -2961,6 +2958,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
>  		}
>  	}
>  
> -	return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
> -				  NULL);
> +	ret = hci_cmd_sync_queue(hdev, abort_conn_sync,
> +				 UINT_PTR(conn->handle), NULL);
> +	if (!ret)
> +		set_bit(HCI_CONN_DISC, &conn->flags);
> +	return ret;
>  }
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 195aea2198a9..65601aa52e0d 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -2535,6 +2535,8 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
>  	mutex_init(&hdev->lock);
>  	mutex_init(&hdev->req_lock);
>  
> +	ida_init(&hdev->unset_handle_ida);
> +
>  	INIT_LIST_HEAD(&hdev->mesh_pending);
>  	INIT_LIST_HEAD(&hdev->mgmt_pending);
>  	INIT_LIST_HEAD(&hdev->reject_list);
> @@ -2789,6 +2791,7 @@ void hci_release_dev(struct hci_dev *hdev)
>  	hci_codec_list_clear(&hdev->local_codecs);
>  	hci_dev_unlock(hdev);
>  
> +	ida_destroy(&hdev->unset_handle_ida);
>  	ida_simple_remove(&hci_index_ida, hdev->id);
>  	kfree_skb(hdev->sent_cmd);
>  	kfree_skb(hdev->recv_event);
Ziyang Xuan (William) Oct. 10, 2023, 12:31 p.m. UTC | #4
> Hi,
> 
> On Mon, Oct 9, 2023 at 5:30 AM Ziyang Xuan
> <william.xuanziyang@huawei.com> wrote:
>>
>> Syzbot reports a UAF problem as follows:
>>
>> BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
>> ...
>> Call Trace:
>>  <TASK>
>>  __dump_stack lib/dump_stack.c:88 [inline]
>>  dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
>>  print_address_description mm/kasan/report.c:364 [inline]
>>  print_report+0x163/0x540 mm/kasan/report.c:475
>>  kasan_report+0x175/0x1b0 mm/kasan/report.c:588
>>  hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
>>  l2cap_send_conn_req net/bluetooth/l2cap_core.c:1286 [inline]
>>  l2cap_start_connection+0x465/0x620 net/bluetooth/l2cap_core.c:1514
>>  l2cap_conn_start+0xbf3/0x1130 net/bluetooth/l2cap_core.c:1661
>>  process_one_work kernel/workqueue.c:2630 [inline]
>>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>>  kthread+0x2d3/0x370 kernel/kthread.c:388
>>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>>  </TASK>
>>
>> Allocated by task 27110:
>>  kasan_save_stack mm/kasan/common.c:45 [inline]
>>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>>  ____kasan_kmalloc mm/kasan/common.c:374 [inline]
>>  __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383
>>  kmalloc include/linux/slab.h:599 [inline]
>>  kzalloc include/linux/slab.h:720 [inline]
>>  hci_chan_create+0xc8/0x300 net/bluetooth/hci_conn.c:2714
>>  l2cap_conn_add+0x69/0xc10 net/bluetooth/l2cap_core.c:7841
>>  l2cap_chan_connect+0x61f/0xeb0 net/bluetooth/l2cap_core.c:8053
>>  bt_6lowpan_connect net/bluetooth/6lowpan.c:894 [inline]
>>  lowpan_control_write+0x55e/0x850 net/bluetooth/6lowpan.c:1129
>>  full_proxy_write+0x113/0x1c0 fs/debugfs/file.c:236
>>  vfs_write+0x286/0xaf0 fs/read_write.c:582
>>  ksys_write+0x1a0/0x2c0 fs/read_write.c:637
>>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>>
>> Freed by task 5067:
>>  kasan_save_stack mm/kasan/common.c:45 [inline]
>>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>>  kasan_save_free_info+0x28/0x40 mm/kasan/generic.c:522
>>  ____kasan_slab_free+0xd6/0x120 mm/kasan/common.c:236
>>  kasan_slab_free include/linux/kasan.h:164 [inline]
>>  slab_free_hook mm/slub.c:1800 [inline]
>>  slab_free_freelist_hook mm/slub.c:1826 [inline]
>>  slab_free mm/slub.c:3809 [inline]
>>  __kmem_cache_free+0x25f/0x3b0 mm/slub.c:3822
>>  hci_chan_list_flush net/bluetooth/hci_conn.c:2754 [inline]
>>  hci_conn_cleanup net/bluetooth/hci_conn.c:152 [inline]
>>  hci_conn_del+0x4f8/0xc40 net/bluetooth/hci_conn.c:1156
>>  hci_abort_conn_sync+0xa45/0xfe0
>>  hci_cmd_sync_work+0x228/0x400 net/bluetooth/hci_sync.c:306
>>  process_one_work kernel/workqueue.c:2630 [inline]
>>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>>  kthread+0x2d3/0x370 kernel/kthread.c:388
>>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>>
>> There are two main reasons for this:
>> 1. In the case of hci_conn_add() concurrency, the handle of
>> hci_conn allocated through hci_conn_hash_alloc_unset() is not unique.
>> That will result in getting the wrong hci_conn by
>> hci_conn_hash_lookup_handle().
>> 2. The processes related to hci_abort_conn() can re-enter for the same
>> hci_conn because atomic_dec_and_test(&hci_conn->refcnt) is established
>> in hci_conn_drop(). That will make hci_conn UAF.
>>
>> To fix this, use ida to manage the allocation of conn->handle, and
>> add the HCI_CONN_DISC flag to avoid re-entering of the processes related
>> to hci_abort_conn().
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>> ---
>> v2:
>>   - Rebase patch base on newest repo.
>>   - Remove HCI_CONN_DISC check in lookup interfaces.
>> ---
>>  include/net/bluetooth/hci_core.h |  6 ++++-
>>  net/bluetooth/hci_conn.c         | 38 ++++++++++++++++----------------
>>  net/bluetooth/hci_core.c         |  3 +++
>>  3 files changed, 27 insertions(+), 20 deletions(-)
>>
>> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
>> index f36c1fd5d64e..4f44f2bffa57 100644
>> --- a/include/net/bluetooth/hci_core.h
>> +++ b/include/net/bluetooth/hci_core.h
>> @@ -350,6 +350,8 @@ struct hci_dev {
>>         struct list_head list;
>>         struct mutex    lock;
>>
>> +       struct ida      unset_handle_ida;
>> +
>>         const char      *name;
>>         unsigned long   flags;
>>         __u16           id;
>> @@ -969,6 +971,7 @@ enum {
>>         HCI_CONN_AUTH_INITIATOR,
>>         HCI_CONN_DROP,
>>         HCI_CONN_CANCEL,
>> +       HCI_CONN_DISC,
> 
> Can't we just use HCI_CONN_DROP instead of introducing yet another flag?

I don't think this works. When the processes related to hci_abort_conn() be re-entered,
hci_conn can occur UAF in hci_proto_disconn_ind().

syzbot:
https://syzkaller.appspot.com/bug?extid=a0c80b06ae2cb8895bc4
Reproducer:
https://syzkaller.appspot.com/text?tag=ReproC&x=103036d6680000
config file:
https://syzkaller.appspot.com/text?tag=KernelConfig&x=bf95903690e3be2d

Ziyang Xuan
> 
>>         HCI_CONN_PARAM_REMOVAL_PEND,
Ziyang Xuan (William) Oct. 10, 2023, 12:49 p.m. UTC | #5
> Hi,
> 
> ma, 2023-10-09 kello 20:30 +0800, Ziyang Xuan kirjoitti:
>> Syzbot reports a UAF problem as follows:
>>
>> BUG: KASAN: slab-use-after-free in hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
>> ...
>> Call Trace:
>>  <TASK>
>>  __dump_stack lib/dump_stack.c:88 [inline]
>>  dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
>>  print_address_description mm/kasan/report.c:364 [inline]
>>  print_report+0x163/0x540 mm/kasan/report.c:475
>>  kasan_report+0x175/0x1b0 mm/kasan/report.c:588
>>  hci_send_acl+0x48/0xe70 net/bluetooth/hci_core.c:3231
>>  l2cap_send_conn_req net/bluetooth/l2cap_core.c:1286 [inline]
>>  l2cap_start_connection+0x465/0x620 net/bluetooth/l2cap_core.c:1514
>>  l2cap_conn_start+0xbf3/0x1130 net/bluetooth/l2cap_core.c:1661
>>  process_one_work kernel/workqueue.c:2630 [inline]
>>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>>  kthread+0x2d3/0x370 kernel/kthread.c:388
>>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>>  </TASK>
>>
>> Allocated by task 27110:
>>  kasan_save_stack mm/kasan/common.c:45 [inline]
>>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>>  ____kasan_kmalloc mm/kasan/common.c:374 [inline]
>>  __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383
>>  kmalloc include/linux/slab.h:599 [inline]
>>  kzalloc include/linux/slab.h:720 [inline]
>>  hci_chan_create+0xc8/0x300 net/bluetooth/hci_conn.c:2714
>>  l2cap_conn_add+0x69/0xc10 net/bluetooth/l2cap_core.c:7841
>>  l2cap_chan_connect+0x61f/0xeb0 net/bluetooth/l2cap_core.c:8053
>>  bt_6lowpan_connect net/bluetooth/6lowpan.c:894 [inline]
>>  lowpan_control_write+0x55e/0x850 net/bluetooth/6lowpan.c:1129
>>  full_proxy_write+0x113/0x1c0 fs/debugfs/file.c:236
>>  vfs_write+0x286/0xaf0 fs/read_write.c:582
>>  ksys_write+0x1a0/0x2c0 fs/read_write.c:637
>>  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
>>  do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
>>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>>
>> Freed by task 5067:
>>  kasan_save_stack mm/kasan/common.c:45 [inline]
>>  kasan_set_track+0x4f/0x70 mm/kasan/common.c:52
>>  kasan_save_free_info+0x28/0x40 mm/kasan/generic.c:522
>>  ____kasan_slab_free+0xd6/0x120 mm/kasan/common.c:236
>>  kasan_slab_free include/linux/kasan.h:164 [inline]
>>  slab_free_hook mm/slub.c:1800 [inline]
>>  slab_free_freelist_hook mm/slub.c:1826 [inline]
>>  slab_free mm/slub.c:3809 [inline]
>>  __kmem_cache_free+0x25f/0x3b0 mm/slub.c:3822
>>  hci_chan_list_flush net/bluetooth/hci_conn.c:2754 [inline]
>>  hci_conn_cleanup net/bluetooth/hci_conn.c:152 [inline]
>>  hci_conn_del+0x4f8/0xc40 net/bluetooth/hci_conn.c:1156
>>  hci_abort_conn_sync+0xa45/0xfe0
>>  hci_cmd_sync_work+0x228/0x400 net/bluetooth/hci_sync.c:306
>>  process_one_work kernel/workqueue.c:2630 [inline]
>>  process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703
>>  worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784
>>  kthread+0x2d3/0x370 kernel/kthread.c:388
>>  ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147
>>  ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
>>
>> There are two main reasons for this:
>> 1. In the case of hci_conn_add() concurrency, the handle of
>> hci_conn allocated through hci_conn_hash_alloc_unset() is not unique.
>> That will result in getting the wrong hci_conn by
>> hci_conn_hash_lookup_handle().
>> 2. The processes related to hci_abort_conn() can re-enter for the same
>> hci_conn because atomic_dec_and_test(&hci_conn->refcnt) is established
>> in hci_conn_drop(). That will make hci_conn UAF.
>>
>> To fix this, use ida to manage the allocation of conn->handle, and
>> add the HCI_CONN_DISC flag to avoid re-entering of the processes related
>> to hci_abort_conn().
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>> ---
>> v2:
>>   - Rebase patch base on newest repo.
>>   - Remove HCI_CONN_DISC check in lookup interfaces.
>> ---
>>  include/net/bluetooth/hci_core.h |  6 ++++-
>>  net/bluetooth/hci_conn.c         | 38 ++++++++++++++++----------------
>>  net/bluetooth/hci_core.c         |  3 +++
>>  3 files changed, 27 insertions(+), 20 deletions(-)
>>
>> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
>> index f36c1fd5d64e..4f44f2bffa57 100644
>> --- a/include/net/bluetooth/hci_core.h
>> +++ b/include/net/bluetooth/hci_core.h
>> @@ -350,6 +350,8 @@ struct hci_dev {
>>  	struct list_head list;
>>  	struct mutex	lock;
>>  
>> +	struct ida	unset_handle_ida;
>> +
>>  	const char	*name;
>>  	unsigned long	flags;
>>  	__u16		id;
>> @@ -969,6 +971,7 @@ enum {
>>  	HCI_CONN_AUTH_INITIATOR,
>>  	HCI_CONN_DROP,
>>  	HCI_CONN_CANCEL,
>> +	HCI_CONN_DISC,
>>  	HCI_CONN_PARAM_REMOVAL_PEND,
>>  	HCI_CONN_NEW_LINK_KEY,
>>  	HCI_CONN_SCANNING,
>> @@ -1543,7 +1546,8 @@ static inline void hci_conn_drop(struct hci_conn *conn)
>>  {
>>  	BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
>>  
>> -	if (atomic_dec_and_test(&conn->refcnt)) {
>> +	if (atomic_dec_and_test(&conn->refcnt) &&
>> +	    !test_bit(HCI_CONN_DISC, &conn->flags)) {
>>  		unsigned long timeo;
> 
> hci_abort_conn already checks if conn->abort_reason was already set, to
> avoid queuing abort for the same conn again. Does this flag not try to
> do the same thing?

That is not enough. hci_conn occur UAF in hci_proto_disconn_ind() before enter
hci_abort_conn().

> 
> There are potential races in hci_sync vs. handle reuse since the queued
> abort is not canceled if the conn is deleted by something else. But now
> I'm not sure syzbot hits this race.

Sorry, can you give a specific scenario. I can't understand exactly what you mean.

> 
>>  
>>  		switch (conn->type) {
>> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
>> index 974631e652c1..f87281b4386f 100644
>> --- a/net/bluetooth/hci_conn.c
>> +++ b/net/bluetooth/hci_conn.c
>> @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
>>  
>>  	hci_conn_hash_del(hdev, conn);
>>  
>> +	if (HCI_CONN_HANDLE_UNSET(conn->handle))
>> +		ida_free(&hdev->unset_handle_ida, conn->handle);
>> +
>>  	if (conn->cleanup)
>>  		conn->cleanup(conn);
>>  
>> @@ -929,29 +932,17 @@ static void cis_cleanup(struct hci_conn *conn)
>>  	hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
>>  }
>>  
>> -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
>> +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
>>  {
>> -	struct hci_conn_hash *h = &hdev->conn_hash;
>> -	struct hci_conn  *c;
>> -	u16 handle = HCI_CONN_HANDLE_MAX + 1;
>> -
>> -	rcu_read_lock();
>> -
>> -	list_for_each_entry_rcu(c, &h->list, list) {
>> -		/* Find the first unused handle */
>> -		if (handle == 0xffff || c->handle != handle)
>> -			break;
>> -		handle++;
>> -	}
>> -	rcu_read_unlock();
> 
> The original hci_conn_hash_alloc_unset seems to have wrong logic. It'll
> e.g. always return HCI_CONN_HANDLE_MAX+1 except if the first conn in
> conn_hash (which is not in particular order) has that handle...
> 
> The code paths adding/deleting conns or entering here / setting handles
> should be holding hdev->lock, so probably no concurrency issue in it.
> 
> Is syzbot happy with just this handle allocation fixed?

Just fix handle, hci_conn occur UAF in hci_proto_disconn_ind() within hci_conn_timeout() process.

> 
>> -
>> -	return handle;
>> +	return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
>> +			       U16_MAX, GFP_ATOMIC);
>>  }
>>  
>
Pauli Virtanen Oct. 10, 2023, 6:32 p.m. UTC | #6
Hi,

ti, 2023-10-10 kello 20:49 +0800, Ziyang Xuan (William)
kirjoitti:[clip]
> > > 
> > > -	if (atomic_dec_and_test(&conn->refcnt)) {
> > > +	if (atomic_dec_and_test(&conn->refcnt) &&
> > > +	    !test_bit(HCI_CONN_DISC, &conn->flags)) {
> > >  		unsigned long timeo;
> > 
> > hci_abort_conn already checks if conn->abort_reason was already set, to
> > avoid queuing abort for the same conn again. Does this flag not try to
> > do the same thing?
> 
> That is not enough. hci_conn occur UAF in hci_proto_disconn_ind() before enter
> hci_abort_conn().

Thanks, this was not clear to me from the patch.

So the problem is that the cancel_delayed_work_sync(&conn->disc_work)
in hci_conn_del doesn't help in a few cases:

1. [task A] hci_conn_del(conn) entered
2. [A] cancel_delayed_work_sync(conn->disc_work)
3. [B] concurrent hci_conn_drop(conn) queues disc_work again
4. [A] hci_conn_del finishes
5. UAF when disc_work runs

or without needing concurrency

1. hci_conn_del(conn) entered and finishes
2. hci_conn_drop(conn); hci_conn_put(conn); as done in several places

?

The hci_conn_del here is not necessarily from hci_abort_conn. Should
the atomic flag be set in hci_conn_del before
cancel_delayed_work_sync(&conn->disc_work) to catch possible other
cases?

> > There are potential races in hci_sync vs. handle reuse since the queued
> > abort is not canceled if the conn is deleted by something else. But now
> > I'm not sure syzbot hits this race.
> 
> Sorry, can you give a specific scenario. I can't understand exactly what you mean.

As noted in the other patch:

1. hci_conn_abort(conn)

2. hci_cmd_sync_queue(hdev,abort_conn_sync,UINT_PTR(conn->handle))

3. something else (e.g. HCI event) deletes conn

4. something else (e.g. HCI event) creates conn2, with same handle
   value

5. Queued abort_conn_sync executes. It needs to be delayed enough,
   but doesn't need to be concurrent.

6. abort_conn_sync uses queued handle value, finds conn2 (not conn)

7. hci_abort_conn_sync(conn2, conn2->abort_reason)

8. Calling hci_abort_conn_sync with reason == 0 causes UAF

The UAF is because reason==0 is passed to l2cap_disconn_ind which does
not signal disconnection to the L2CAP layer, hci_abort_conn_sync
deletes conn immediately after that, and later on L2CAP tries to access
stale conn pointer.

> > > 
> > >  		switch (conn->type) {
> > > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> > > index 974631e652c1..f87281b4386f 100644
> > > --- a/net/bluetooth/hci_conn.c
> > > +++ b/net/bluetooth/hci_conn.c
> > > @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
> > >  
> > > 
> > > 
> > > 
> > >  	hci_conn_hash_del(hdev, conn);
> > >  
> > > 
> > > 
> > > 
> > > +	if (HCI_CONN_HANDLE_UNSET(conn->handle))
> > > +		ida_free(&hdev->unset_handle_ida, conn->handle);
> > > +
> > >  	if (conn->cleanup)
> > >  		conn->cleanup(conn);
> > >  
> > > 
> > > 
> > > 
> > > @@ -929,29 +932,17 @@ static void cis_cleanup(struct hci_conn *conn)
> > >  	hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
> > >  }
> > >  
> > > 
> > > 
> > > 
> > > -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> > > +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> > >  {
> > > -	struct hci_conn_hash *h = &hdev->conn_hash;
> > > -	struct hci_conn  *c;
> > > -	u16 handle = HCI_CONN_HANDLE_MAX + 1;
> > > -
> > > -	rcu_read_lock();
> > > -
> > > -	list_for_each_entry_rcu(c, &h->list, list) {
> > > -		/* Find the first unused handle */
> > > -		if (handle == 0xffff || c->handle != handle)
> > > -			break;
> > > -		handle++;
> > > -	}
> > > -	rcu_read_unlock();
> > 
> > The original hci_conn_hash_alloc_unset seems to have wrong logic. It'll
> > e.g. always return HCI_CONN_HANDLE_MAX+1 except if the first conn in
> > conn_hash (which is not in particular order) has that handle...
> > 
> > The code paths adding/deleting conns or entering here / setting handles
> > should be holding hdev->lock, so probably no concurrency issue in it.
> > 
> > Is syzbot happy with just this handle allocation fixed?
> 
> Just fix handle, hci_conn occur UAF in hci_proto_disconn_ind() within hci_conn_timeout() process.
> 
> > 
> > > -
> > > -	return handle;
> > > +	return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
> > > +			       U16_MAX, GFP_ATOMIC);
> > >  }
> > >  
> > > 
> > > 
> > > 
> >
Luiz Augusto von Dentz Oct. 10, 2023, 6:51 p.m. UTC | #7
Hi Pauli,

On Tue, Oct 10, 2023 at 11:32 AM Pauli Virtanen <pav@iki.fi> wrote:
>
> Hi,
>
> ti, 2023-10-10 kello 20:49 +0800, Ziyang Xuan (William)
> kirjoitti:[clip]
> > > >
> > > > - if (atomic_dec_and_test(&conn->refcnt)) {
> > > > + if (atomic_dec_and_test(&conn->refcnt) &&
> > > > +     !test_bit(HCI_CONN_DISC, &conn->flags)) {
> > > >           unsigned long timeo;
> > >
> > > hci_abort_conn already checks if conn->abort_reason was already set, to
> > > avoid queuing abort for the same conn again. Does this flag not try to
> > > do the same thing?
> >
> > That is not enough. hci_conn occur UAF in hci_proto_disconn_ind() before enter
> > hci_abort_conn().
>
> Thanks, this was not clear to me from the patch.
>
> So the problem is that the cancel_delayed_work_sync(&conn->disc_work)
> in hci_conn_del doesn't help in a few cases:
>
> 1. [task A] hci_conn_del(conn) entered
> 2. [A] cancel_delayed_work_sync(conn->disc_work)
> 3. [B] concurrent hci_conn_drop(conn) queues disc_work again
> 4. [A] hci_conn_del finishes
> 5. UAF when disc_work runs
>
> or without needing concurrency
>
> 1. hci_conn_del(conn) entered and finishes
> 2. hci_conn_drop(conn); hci_conn_put(conn); as done in several places
>
> ?

Either way Im not sure what the IDA logic has to with it, that said I
think using ida function is actually a much better solution then the
lookup one so I would be happy to apply it if someone split the
changes related to it and send a patch.

> The hci_conn_del here is not necessarily from hci_abort_conn. Should
> the atomic flag be set in hci_conn_del before
> cancel_delayed_work_sync(&conn->disc_work) to catch possible other
> cases?
>
> > > There are potential races in hci_sync vs. handle reuse since the queued
> > > abort is not canceled if the conn is deleted by something else. But now
> > > I'm not sure syzbot hits this race.
> >
> > Sorry, can you give a specific scenario. I can't understand exactly what you mean.
>
> As noted in the other patch:
>
> 1. hci_conn_abort(conn)
>
> 2. hci_cmd_sync_queue(hdev,abort_conn_sync,UINT_PTR(conn->handle))
>
> 3. something else (e.g. HCI event) deletes conn
>
> 4. something else (e.g. HCI event) creates conn2, with same handle
>    value
>
> 5. Queued abort_conn_sync executes. It needs to be delayed enough,
>    but doesn't need to be concurrent.
>
> 6. abort_conn_sync uses queued handle value, finds conn2 (not conn)
>
> 7. hci_abort_conn_sync(conn2, conn2->abort_reason)
>
> 8. Calling hci_abort_conn_sync with reason == 0 causes UAF
>
> The UAF is because reason==0 is passed to l2cap_disconn_ind which does
> not signal disconnection to the L2CAP layer, hci_abort_conn_sync
> deletes conn immediately after that, and later on L2CAP tries to access
> stale conn pointer.

Are you looking into implementing the cancel logic for abort? Long ago
Ive send a patch to introduce the queue_one logic which does include a
function to lookup into the cmd_sync queue, perhaps we can reuse that
to implement the cancel logic.

> > > >
> > > >           switch (conn->type) {
> > > > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> > > > index 974631e652c1..f87281b4386f 100644
> > > > --- a/net/bluetooth/hci_conn.c
> > > > +++ b/net/bluetooth/hci_conn.c
> > > > @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
> > > >
> > > >
> > > >
> > > >
> > > >   hci_conn_hash_del(hdev, conn);
> > > >
> > > >
> > > >
> > > >
> > > > + if (HCI_CONN_HANDLE_UNSET(conn->handle))
> > > > +         ida_free(&hdev->unset_handle_ida, conn->handle);
> > > > +
> > > >   if (conn->cleanup)
> > > >           conn->cleanup(conn);
> > > >
> > > >
> > > >
> > > >
> > > > @@ -929,29 +932,17 @@ static void cis_cleanup(struct hci_conn *conn)
> > > >   hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
> > > >  }
> > > >
> > > >
> > > >
> > > >
> > > > -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> > > > +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> > > >  {
> > > > - struct hci_conn_hash *h = &hdev->conn_hash;
> > > > - struct hci_conn  *c;
> > > > - u16 handle = HCI_CONN_HANDLE_MAX + 1;
> > > > -
> > > > - rcu_read_lock();
> > > > -
> > > > - list_for_each_entry_rcu(c, &h->list, list) {
> > > > -         /* Find the first unused handle */
> > > > -         if (handle == 0xffff || c->handle != handle)
> > > > -                 break;
> > > > -         handle++;
> > > > - }
> > > > - rcu_read_unlock();
> > >
> > > The original hci_conn_hash_alloc_unset seems to have wrong logic. It'll
> > > e.g. always return HCI_CONN_HANDLE_MAX+1 except if the first conn in
> > > conn_hash (which is not in particular order) has that handle...
> > >
> > > The code paths adding/deleting conns or entering here / setting handles
> > > should be holding hdev->lock, so probably no concurrency issue in it.
> > >
> > > Is syzbot happy with just this handle allocation fixed?
> >
> > Just fix handle, hci_conn occur UAF in hci_proto_disconn_ind() within hci_conn_timeout() process.
> >
> > >
> > > > -
> > > > - return handle;
> > > > + return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
> > > > +                        U16_MAX, GFP_ATOMIC);
> > > >  }
> > > >
> > > >
> > > >
> > > >
> > >
>
> --
> Pauli Virtanen
Ziyang Xuan (William) Oct. 11, 2023, 9:31 a.m. UTC | #8
> Hi Pauli,
> 
> On Tue, Oct 10, 2023 at 11:32 AM Pauli Virtanen <pav@iki.fi> wrote:
>>
>> Hi,
>>
>> ti, 2023-10-10 kello 20:49 +0800, Ziyang Xuan (William)
>> kirjoitti:[clip]
>>>>>
>>>>> - if (atomic_dec_and_test(&conn->refcnt)) {
>>>>> + if (atomic_dec_and_test(&conn->refcnt) &&
>>>>> +     !test_bit(HCI_CONN_DISC, &conn->flags)) {
>>>>>           unsigned long timeo;
>>>>
>>>> hci_abort_conn already checks if conn->abort_reason was already set, to
>>>> avoid queuing abort for the same conn again. Does this flag not try to
>>>> do the same thing?
>>>
>>> That is not enough. hci_conn occur UAF in hci_proto_disconn_ind() before enter
>>> hci_abort_conn().
>>
>> Thanks, this was not clear to me from the patch.
>>
>> So the problem is that the cancel_delayed_work_sync(&conn->disc_work)
>> in hci_conn_del doesn't help in a few cases:
>>
>> 1. [task A] hci_conn_del(conn) entered
>> 2. [A] cancel_delayed_work_sync(conn->disc_work)
>> 3. [B] concurrent hci_conn_drop(conn) queues disc_work again
>> 4. [A] hci_conn_del finishes
>> 5. UAF when disc_work runs
>>
>> or without needing concurrency
>>
>> 1. hci_conn_del(conn) entered and finishes
>> 2. hci_conn_drop(conn); hci_conn_put(conn); as done in several places
>>
>> ?
> 
> Either way Im not sure what the IDA logic has to with it, that said I
> think using ida function is actually a much better solution then the
> lookup one so I would be happy to apply it if someone split the
> changes related to it and send a patch.

I will send a patch just with handle ida modification. The remaining UAF issues
continue to be tracked.

> 
>> The hci_conn_del here is not necessarily from hci_abort_conn. Should
>> the atomic flag be set in hci_conn_del before
>> cancel_delayed_work_sync(&conn->disc_work) to catch possible other
>> cases?
>>
Pauli Virtanen Oct. 11, 2023, 7:33 p.m. UTC | #9
Hi Luiz,

ti, 2023-10-10 kello 11:51 -0700, Luiz Augusto von Dentz kirjoitti:
> Hi Pauli,
> 
> On Tue, Oct 10, 2023 at 11:32 AM Pauli Virtanen <pav@iki.fi> wrote:
> > 
> > Hi,
> > 
> > ti, 2023-10-10 kello 20:49 +0800, Ziyang Xuan (William)
> > kirjoitti:[clip]
> > > > > 
> > > > > - if (atomic_dec_and_test(&conn->refcnt)) {
> > > > > + if (atomic_dec_and_test(&conn->refcnt) &&
> > > > > +     !test_bit(HCI_CONN_DISC, &conn->flags)) {
> > > > >           unsigned long timeo;
> > > > 
> > > > hci_abort_conn already checks if conn->abort_reason was already set, to
> > > > avoid queuing abort for the same conn again. Does this flag not try to
> > > > do the same thing?
> > > 
> > > That is not enough. hci_conn occur UAF in hci_proto_disconn_ind() before enter
> > > hci_abort_conn().
> > 
> > Thanks, this was not clear to me from the patch.
> > 
> > So the problem is that the cancel_delayed_work_sync(&conn->disc_work)
> > in hci_conn_del doesn't help in a few cases:
> > 
> > 1. [task A] hci_conn_del(conn) entered
> > 2. [A] cancel_delayed_work_sync(conn->disc_work)
> > 3. [B] concurrent hci_conn_drop(conn) queues disc_work again
> > 4. [A] hci_conn_del finishes
> > 5. UAF when disc_work runs
> > 
> > or without needing concurrency
> > 
> > 1. hci_conn_del(conn) entered and finishes
> > 2. hci_conn_drop(conn); hci_conn_put(conn); as done in several places
> > 
> > ?
> 
> Either way Im not sure what the IDA logic has to with it, that said I
> think using ida function is actually a much better solution then the
> lookup one so I would be happy to apply it if someone split the
> changes related to it and send a patch.
> 
> > The hci_conn_del here is not necessarily from hci_abort_conn. Should
> > the atomic flag be set in hci_conn_del before
> > cancel_delayed_work_sync(&conn->disc_work) to catch possible other
> > cases?
> > 
> > > > There are potential races in hci_sync vs. handle reuse since the queued
> > > > abort is not canceled if the conn is deleted by something else. But now
> > > > I'm not sure syzbot hits this race.
> > > 
> > > Sorry, can you give a specific scenario. I can't understand exactly what you mean.
> > 
> > As noted in the other patch:
> > 
> > 1. hci_conn_abort(conn)
> > 
> > 2. hci_cmd_sync_queue(hdev,abort_conn_sync,UINT_PTR(conn->handle))
> > 
> > 3. something else (e.g. HCI event) deletes conn
> > 
> > 4. something else (e.g. HCI event) creates conn2, with same handle
> >    value
> > 
> > 5. Queued abort_conn_sync executes. It needs to be delayed enough,
> >    but doesn't need to be concurrent.
> > 
> > 6. abort_conn_sync uses queued handle value, finds conn2 (not conn)
> > 
> > 7. hci_abort_conn_sync(conn2, conn2->abort_reason)
> > 
> > 8. Calling hci_abort_conn_sync with reason == 0 causes UAF
> > 
> > The UAF is because reason==0 is passed to l2cap_disconn_ind which does
> > not signal disconnection to the L2CAP layer, hci_abort_conn_sync
> > deletes conn immediately after that, and later on L2CAP tries to access
> > stale conn pointer.
> 
> Are you looking into implementing the cancel logic for abort? Long ago
> Ive send a patch to introduce the queue_one logic which does include a
> function to lookup into the cmd_sync queue, perhaps we can reuse that
> to implement the cancel logic.

I have a patchset for this, but needs still some more testing.

> > > > > 
> > > > >           switch (conn->type) {
> > > > > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
> > > > > index 974631e652c1..f87281b4386f 100644
> > > > > --- a/net/bluetooth/hci_conn.c
> > > > > +++ b/net/bluetooth/hci_conn.c
> > > > > @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > >   hci_conn_hash_del(hdev, conn);
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > > + if (HCI_CONN_HANDLE_UNSET(conn->handle))
> > > > > +         ida_free(&hdev->unset_handle_ida, conn->handle);
> > > > > +
> > > > >   if (conn->cleanup)
> > > > >           conn->cleanup(conn);
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > > @@ -929,29 +932,17 @@ static void cis_cleanup(struct hci_conn *conn)
> > > > >   hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
> > > > >  }
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > > -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> > > > > +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
> > > > >  {
> > > > > - struct hci_conn_hash *h = &hdev->conn_hash;
> > > > > - struct hci_conn  *c;
> > > > > - u16 handle = HCI_CONN_HANDLE_MAX + 1;
> > > > > -
> > > > > - rcu_read_lock();
> > > > > -
> > > > > - list_for_each_entry_rcu(c, &h->list, list) {
> > > > > -         /* Find the first unused handle */
> > > > > -         if (handle == 0xffff || c->handle != handle)
> > > > > -                 break;
> > > > > -         handle++;
> > > > > - }
> > > > > - rcu_read_unlock();
> > > > 
> > > > The original hci_conn_hash_alloc_unset seems to have wrong logic. It'll
> > > > e.g. always return HCI_CONN_HANDLE_MAX+1 except if the first conn in
> > > > conn_hash (which is not in particular order) has that handle...
> > > > 
> > > > The code paths adding/deleting conns or entering here / setting handles
> > > > should be holding hdev->lock, so probably no concurrency issue in it.
> > > > 
> > > > Is syzbot happy with just this handle allocation fixed?
> > > 
> > > Just fix handle, hci_conn occur UAF in hci_proto_disconn_ind() within hci_conn_timeout() process.
> > > 
> > > > 
> > > > > -
> > > > > - return handle;
> > > > > + return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
> > > > > +                        U16_MAX, GFP_ATOMIC);
> > > > >  }
> > > > > 
> > > > > 
> > > > > 
> > > > > 
> > > > 
> > 
> > --
> > Pauli Virtanen
> 
> 
>
diff mbox series

Patch

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f36c1fd5d64e..4f44f2bffa57 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -350,6 +350,8 @@  struct hci_dev {
 	struct list_head list;
 	struct mutex	lock;
 
+	struct ida	unset_handle_ida;
+
 	const char	*name;
 	unsigned long	flags;
 	__u16		id;
@@ -969,6 +971,7 @@  enum {
 	HCI_CONN_AUTH_INITIATOR,
 	HCI_CONN_DROP,
 	HCI_CONN_CANCEL,
+	HCI_CONN_DISC,
 	HCI_CONN_PARAM_REMOVAL_PEND,
 	HCI_CONN_NEW_LINK_KEY,
 	HCI_CONN_SCANNING,
@@ -1543,7 +1546,8 @@  static inline void hci_conn_drop(struct hci_conn *conn)
 {
 	BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
 
-	if (atomic_dec_and_test(&conn->refcnt)) {
+	if (atomic_dec_and_test(&conn->refcnt) &&
+	    !test_bit(HCI_CONN_DISC, &conn->flags)) {
 		unsigned long timeo;
 
 		switch (conn->type) {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 974631e652c1..f87281b4386f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -153,6 +153,9 @@  static void hci_conn_cleanup(struct hci_conn *conn)
 
 	hci_conn_hash_del(hdev, conn);
 
+	if (HCI_CONN_HANDLE_UNSET(conn->handle))
+		ida_free(&hdev->unset_handle_ida, conn->handle);
+
 	if (conn->cleanup)
 		conn->cleanup(conn);
 
@@ -929,29 +932,17 @@  static void cis_cleanup(struct hci_conn *conn)
 	hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
 }
 
-static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
+static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
 {
-	struct hci_conn_hash *h = &hdev->conn_hash;
-	struct hci_conn  *c;
-	u16 handle = HCI_CONN_HANDLE_MAX + 1;
-
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(c, &h->list, list) {
-		/* Find the first unused handle */
-		if (handle == 0xffff || c->handle != handle)
-			break;
-		handle++;
-	}
-	rcu_read_unlock();
-
-	return handle;
+	return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
+			       U16_MAX, GFP_ATOMIC);
 }
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 			      u8 role)
 {
 	struct hci_conn *conn;
+	int handle;
 
 	BT_DBG("%s dst %pMR", hdev->name, dst);
 
@@ -961,7 +952,12 @@  struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 
 	bacpy(&conn->dst, dst);
 	bacpy(&conn->src, &hdev->bdaddr);
-	conn->handle = hci_conn_hash_alloc_unset(hdev);
+	handle = hci_conn_hash_alloc_unset(hdev);
+	if (unlikely(handle < 0)) {
+		kfree(conn);
+		return NULL;
+	}
+	conn->handle = handle;
 	conn->hdev  = hdev;
 	conn->type  = type;
 	conn->role  = role;
@@ -2933,6 +2929,7 @@  static int abort_conn_sync(struct hci_dev *hdev, void *data)
 int hci_abort_conn(struct hci_conn *conn, u8 reason)
 {
 	struct hci_dev *hdev = conn->hdev;
+	int ret;
 
 	/* If abort_reason has already been set it means the connection is
 	 * already being aborted so don't attempt to overwrite it.
@@ -2961,6 +2958,9 @@  int hci_abort_conn(struct hci_conn *conn, u8 reason)
 		}
 	}
 
-	return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
-				  NULL);
+	ret = hci_cmd_sync_queue(hdev, abort_conn_sync,
+				 UINT_PTR(conn->handle), NULL);
+	if (!ret)
+		set_bit(HCI_CONN_DISC, &conn->flags);
+	return ret;
 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 195aea2198a9..65601aa52e0d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2535,6 +2535,8 @@  struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
 	mutex_init(&hdev->lock);
 	mutex_init(&hdev->req_lock);
 
+	ida_init(&hdev->unset_handle_ida);
+
 	INIT_LIST_HEAD(&hdev->mesh_pending);
 	INIT_LIST_HEAD(&hdev->mgmt_pending);
 	INIT_LIST_HEAD(&hdev->reject_list);
@@ -2789,6 +2791,7 @@  void hci_release_dev(struct hci_dev *hdev)
 	hci_codec_list_clear(&hdev->local_codecs);
 	hci_dev_unlock(hdev);
 
+	ida_destroy(&hdev->unset_handle_ida);
 	ida_simple_remove(&hci_index_ida, hdev->id);
 	kfree_skb(hdev->sent_cmd);
 	kfree_skb(hdev->recv_event);