diff mbox series

[bug,report] GATT server crash after client disconnect (UAF)

Message ID CAH4xE45xkoHOY6kgYLf=HVVrMPrgqDZzRfEwGi3Ozi1MW4e+dg@mail.gmail.com (mailing list archive)
State New
Headers show
Series [bug,report] GATT server crash after client disconnect (UAF) | expand

Checks

Context Check Description
tedd_an/pre-ci_am fail error: corrupt patch at line 22 hint: Use 'git am --show-current-patch' to see the failed patch

Commit Message

Kévin Courdesses June 26, 2024, 10:53 a.m. UTC
Hello,

We detected a spurious crash of `bluetoothd` in our embedded system,
acting as a GATT server. The system is running the latest BlueZ
release (5.76).

I traced the problem to a use-after-free (UAF) error, sometimes
triggered when a device disconnects while a characteristic read or
write is still being processed. Below are relevant debug and Valgrind
traces.

----
bluetoothd[167]: src/shared/gatt-server.c:handle_read_req() Read Req -
handle: 0x0014
bluetoothd[167]: src/shared/gatt-server.c:read_complete_cb() Read
Complete: err 0
bluetoothd[167]: src/shared/att.c:can_read_data() (chan 0x53bd198) ATT
PDU received: 0x0a
bluetoothd[167]: src/shared/gatt-server.c:handle_read_req() Read Req -
handle: 0x002e
bluetoothd[167]: src/shared/mgmt.c:can_read_data() [0x0000] event 0x000c
bluetoothd[167]: src/adapter.c:dev_disconnected() Device
48:A4:72:79:82:D5 disconnected, reason 3
bluetoothd[167]: src/adapter.c:adapter_remove_connection()
bluetoothd[167]: plugins/policy.c:disconnect_cb() reason 3
bluetoothd[167]: src/adapter.c:bonding_attempt_complete() hci0 bdaddr
48:A4:72:79:82:D5 type 1 status 0xe
bluetoothd[167]: src/device.c:device_bonding_complete() bonding (nil)
status 0x0e
bluetoothd[167]: src/device.c:btd_device_set_temporary() temporary 1
bluetoothd[167]: src/device.c:device_bonding_failed() status 14
bluetoothd[167]: src/adapter.c:resume_discovery()
bluetoothd[167]: src/shared/att.c:disconnect_cb() Channel 0x53bd198
disconnected: Connection reset by peer
bluetoothd[167]: src/device.c:att_disconnected_cb()
bluetoothd[167]: src/device.c:att_disconnected_cb() Connection reset
by peer (104)
bluetoothd[167]: src/service.c:change_state() 0x53faa80: device
48:A4:72:79:82:D5 profile gap-profile state changed: connected ->
disconnecting (0)
bluetoothd[167]: src/service.c:change_state() 0x53faa80: device
48:A4:72:79:82:D5 profile gap-profile state changed: disconnecting ->
disconnected (0)
bluetoothd[167]: src/service.c:change_state() 0x54017a8: device
48:A4:72:79:82:D5 profile deviceinfo state changed: connected ->
disconnecting (0)
bluetoothd[167]: src/service.c:change_state() 0x54017a8: device
48:A4:72:79:82:D5 profile deviceinfo state changed: disconnecting ->
disconnected (0)
bluetoothd[167]: src/gatt-client.c:btd_gatt_client_disconnected()
Device disconnected. Cleaning up.
bluetoothd[167]: src/device.c:att_disconnected_cb() Automatic
connection disabled
bluetoothd[167]: src/gatt-database.c:btd_gatt_database_att_disconnected()
bluetoothd[167]: src/gatt-database.c:att_disconnected()
bluetoothd[167]: attrib/gattrib.c:g_attrib_unref() 0x53bd140: g_attrib_unref=0
bluetoothd[167]: src/gatt-database.c:read_reply_cb() Pending read was
canceled when object got removed
bluetoothd[167]: src/shared/gatt-server.c:read_complete_cb() Read
Complete: err -110
==167== Invalid read of size 4
==167==    at 0x8277C: bt_att_chan_send_error_rsp (in
/usr/libexec/bluetooth/bluetoothd)
==167==  Address 0x53bd198 is 0 bytes inside a block of size 44 free'd
==167==    at 0x482F350: free (vg_replace_malloc.c:538)
==167==    by 0x8320F: disconnect_cb (in /usr/libexec/bluetooth/bluetoothd)
==167==  Block was alloc'd at
==167==    at 0x482DE08: malloc (vg_replace_malloc.c:307)
==167==    by 0x7BDD3: util_malloc (in /usr/libexec/bluetooth/bluetoothd)
==167==
[cropped, valgrind complains a lot after this]
----

These logs illustrate that the problem occurs soon after
`read_complete_cb` is executed, in `bt_att_chan_send_error_rsp`.

src/shared/gatt-server.c
     904 static void read_complete_cb(struct gatt_db_attribute *attr, int err,
     905                     const uint8_t *value, size_t len,
     906                     void *user_data)
     907 {
     908     struct async_read_op *op = user_data;
     909     struct bt_gatt_server *server = op->server;
     910     uint8_t rsp_opcode;
     911     uint16_t mtu;
     912     uint16_t handle;
     913
     914     DBG(server, "Read Complete: err %d", err);
     915
     916     mtu = bt_att_get_mtu(server->att);
     917     handle = gatt_db_attribute_get_handle(attr);
     918
     919     if (err) {
     920         bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err);
     921         async_read_op_destroy(op);
     922         return;
     923     }

However, at this point, `op->chan` has already been freed by a `disconnect_cb`.

I'm not familiar with the BlueZ code base and I don't know the
idiomatic way to fix this issue. I'd say `bt_att_chan_send_error_rsp`
should only be called if `op->chan` is still allocated. The following
patch attempts to implement a solution. It's probably not the right
way to do it, but this seemingly fixes the issue on our side.

-----
        else
                bt_att_chan_send_rsp(op->chan, BT_ATT_OP_WRITE_RSP, NULL, 0);

@@ -917,7 +931,19 @@ static void read_complete_cb(struct
gatt_db_attribute *attr, int err,
        handle = gatt_db_attribute_get_handle(attr);

        if (err) {
-               bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err);
+               // Only call bt_att_chan_send_error_rsp if the channel
+               // is still valid
+               struct bt_att *att = bt_gatt_server_get_att(server);
+               struct queue *chans = bt_att_get_chans(att);
+               const struct queue_entry *entry;
+               for (entry = queue_get_entries(chans); entry; entry =
entry->next)
+               {
+                       if (entry->data == op->chan)
+                       {
+                               bt_att_chan_send_error_rsp(op->chan,
op->opcode, handle, err);
+                               break;
+                       }
+               }
                async_read_op_destroy(op);
                return;
        }
-----

Regards,

Kévin

Comments

bluez.test.bot@gmail.com June 26, 2024, 11:12 a.m. UTC | #1
This is an automated email and please do not reply to this email.

Dear Submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
While preparing the CI tests, the patches you submitted couldn't be applied to the current HEAD of the repository.

----- Output -----

error: corrupt patch at line 22
hint: Use 'git am --show-current-patch' to see the failed patch

Please resolve the issue and submit the patches again.


---
Regards,
Linux Bluetooth
Luiz Augusto von Dentz June 26, 2024, 2:03 p.m. UTC | #2
Hi Kevin,

On Wed, Jun 26, 2024 at 6:58 AM Kévin Courdesses
<kevin.courdesses@beacon.bio> wrote:
>
> Hello,
>
> We detected a spurious crash of `bluetoothd` in our embedded system,
> acting as a GATT server. The system is running the latest BlueZ
> release (5.76).
>
> I traced the problem to a use-after-free (UAF) error, sometimes
> triggered when a device disconnects while a characteristic read or
> write is still being processed. Below are relevant debug and Valgrind
> traces.
>
> ----
> bluetoothd[167]: src/shared/gatt-server.c:handle_read_req() Read Req -
> handle: 0x0014
> bluetoothd[167]: src/shared/gatt-server.c:read_complete_cb() Read
> Complete: err 0
> bluetoothd[167]: src/shared/att.c:can_read_data() (chan 0x53bd198) ATT
> PDU received: 0x0a
> bluetoothd[167]: src/shared/gatt-server.c:handle_read_req() Read Req -
> handle: 0x002e
> bluetoothd[167]: src/shared/mgmt.c:can_read_data() [0x0000] event 0x000c
> bluetoothd[167]: src/adapter.c:dev_disconnected() Device
> 48:A4:72:79:82:D5 disconnected, reason 3
> bluetoothd[167]: src/adapter.c:adapter_remove_connection()
> bluetoothd[167]: plugins/policy.c:disconnect_cb() reason 3
> bluetoothd[167]: src/adapter.c:bonding_attempt_complete() hci0 bdaddr
> 48:A4:72:79:82:D5 type 1 status 0xe
> bluetoothd[167]: src/device.c:device_bonding_complete() bonding (nil)
> status 0x0e
> bluetoothd[167]: src/device.c:btd_device_set_temporary() temporary 1
> bluetoothd[167]: src/device.c:device_bonding_failed() status 14
> bluetoothd[167]: src/adapter.c:resume_discovery()
> bluetoothd[167]: src/shared/att.c:disconnect_cb() Channel 0x53bd198
> disconnected: Connection reset by peer
> bluetoothd[167]: src/device.c:att_disconnected_cb()
> bluetoothd[167]: src/device.c:att_disconnected_cb() Connection reset
> by peer (104)
> bluetoothd[167]: src/service.c:change_state() 0x53faa80: device
> 48:A4:72:79:82:D5 profile gap-profile state changed: connected ->
> disconnecting (0)
> bluetoothd[167]: src/service.c:change_state() 0x53faa80: device
> 48:A4:72:79:82:D5 profile gap-profile state changed: disconnecting ->
> disconnected (0)
> bluetoothd[167]: src/service.c:change_state() 0x54017a8: device
> 48:A4:72:79:82:D5 profile deviceinfo state changed: connected ->
> disconnecting (0)
> bluetoothd[167]: src/service.c:change_state() 0x54017a8: device
> 48:A4:72:79:82:D5 profile deviceinfo state changed: disconnecting ->
> disconnected (0)
> bluetoothd[167]: src/gatt-client.c:btd_gatt_client_disconnected()
> Device disconnected. Cleaning up.
> bluetoothd[167]: src/device.c:att_disconnected_cb() Automatic
> connection disabled
> bluetoothd[167]: src/gatt-database.c:btd_gatt_database_att_disconnected()
> bluetoothd[167]: src/gatt-database.c:att_disconnected()
> bluetoothd[167]: attrib/gattrib.c:g_attrib_unref() 0x53bd140: g_attrib_unref=0
> bluetoothd[167]: src/gatt-database.c:read_reply_cb() Pending read was
> canceled when object got removed
> bluetoothd[167]: src/shared/gatt-server.c:read_complete_cb() Read
> Complete: err -110
> ==167== Invalid read of size 4
> ==167==    at 0x8277C: bt_att_chan_send_error_rsp (in
> /usr/libexec/bluetooth/bluetoothd)
> ==167==  Address 0x53bd198 is 0 bytes inside a block of size 44 free'd
> ==167==    at 0x482F350: free (vg_replace_malloc.c:538)
> ==167==    by 0x8320F: disconnect_cb (in /usr/libexec/bluetooth/bluetoothd)
> ==167==  Block was alloc'd at
> ==167==    at 0x482DE08: malloc (vg_replace_malloc.c:307)
> ==167==    by 0x7BDD3: util_malloc (in /usr/libexec/bluetooth/bluetoothd)
> ==167==
> [cropped, valgrind complains a lot after this]
> ----
>
> These logs illustrate that the problem occurs soon after
> `read_complete_cb` is executed, in `bt_att_chan_send_error_rsp`.
>
> src/shared/gatt-server.c
>      904 static void read_complete_cb(struct gatt_db_attribute *attr, int err,
>      905                     const uint8_t *value, size_t len,
>      906                     void *user_data)
>      907 {
>      908     struct async_read_op *op = user_data;
>      909     struct bt_gatt_server *server = op->server;
>      910     uint8_t rsp_opcode;
>      911     uint16_t mtu;
>      912     uint16_t handle;
>      913
>      914     DBG(server, "Read Complete: err %d", err);
>      915
>      916     mtu = bt_att_get_mtu(server->att);
>      917     handle = gatt_db_attribute_get_handle(attr);
>      918
>      919     if (err) {
>      920         bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err);
>      921         async_read_op_destroy(op);
>      922         return;
>      923     }
>
> However, at this point, `op->chan` has already been freed by a `disconnect_cb`.
>
> I'm not familiar with the BlueZ code base and I don't know the
> idiomatic way to fix this issue. I'd say `bt_att_chan_send_error_rsp`
> should only be called if `op->chan` is still allocated. The following
> patch attempts to implement a solution. It's probably not the right
> way to do it, but this seemingly fixes the issue on our side.

If we get to this point perhaps the op is being cleaned up so we might
need to set the op->chan to NULL or properly reference it at op
creation so it is not freed until the op is freed.

> -----
> diff --git a/src/shared/att.c b/src/shared/att.c
> index 485ef07..baef0cc 100644
> --- a/src/shared/att.c
> +++ b/src/shared/att.c
> @@ -2068,3 +2068,11 @@ done:
>
>         return true;
>  }
> +
> +struct queue *bt_att_get_chans(struct bt_att *att)
> +{
> +       if (!att)
> +               return NULL;
> +
> +       return att->chans;
> +}
> \ No newline at end of file
> diff --git a/src/shared/att.h b/src/shared/att.h
> index 6fd7863..98f91e1 100644
> --- a/src/shared/att.h
> +++ b/src/shared/att.h
> @@ -111,3 +111,5 @@ bool bt_att_set_remote_key(struct bt_att *att,
> uint8_t sign_key[16],
>                         bt_att_counter_func_t func, void *user_data);
>  bool bt_att_has_crypto(struct bt_att *att);
>  bool bt_att_set_retry(struct bt_att *att, unsigned int id, bool retry);
> +
> +struct queue *bt_att_get_chans(struct bt_att *att);
> \ No newline at end of file
> diff --git a/src/shared/gatt-server.c b/src/shared/gatt-server.c
> index 0e399ce..ca9af59 100644
> --- a/src/shared/gatt-server.c
> +++ b/src/shared/gatt-server.c
> @@ -790,7 +790,21 @@ static void write_complete_cb(struct
> gatt_db_attribute *attr, int err,
>         handle = gatt_db_attribute_get_handle(attr);
>
>         if (err)
> -               bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err);
> +       {
> +               // Only call bt_att_chan_send_error_rsp if the channel
> +               // is still valid
> +               struct bt_att *att = bt_gatt_server_get_att(server);
> +               struct queue *chans = bt_att_get_chans(att);
> +               const struct queue_entry *entry;
> +               for (entry = queue_get_entries(chans); entry; entry =
> entry->next)
> +               {
> +                       if (entry->data == op->chan)
> +                       {
> +                               bt_att_chan_send_error_rsp(op->chan,
> op->opcode, handle, err);
> +                               break;
> +                       }
> +               }
> +       }
>         else
>                 bt_att_chan_send_rsp(op->chan, BT_ATT_OP_WRITE_RSP, NULL, 0);
>
> @@ -917,7 +931,19 @@ static void read_complete_cb(struct
> gatt_db_attribute *attr, int err,
>         handle = gatt_db_attribute_get_handle(attr);
>
>         if (err) {
> -               bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err);
> +               // Only call bt_att_chan_send_error_rsp if the channel
> +               // is still valid
> +               struct bt_att *att = bt_gatt_server_get_att(server);
> +               struct queue *chans = bt_att_get_chans(att);
> +               const struct queue_entry *entry;
> +               for (entry = queue_get_entries(chans); entry; entry =
> entry->next)
> +               {
> +                       if (entry->data == op->chan)
> +                       {
> +                               bt_att_chan_send_error_rsp(op->chan,
> op->opcode, handle, err);
> +                               break;
> +                       }
> +               }
>                 async_read_op_destroy(op);
>                 return;
>         }
> -----
>
> Regards,
>
> Kévin
>
diff mbox series

Patch

diff --git a/src/shared/att.c b/src/shared/att.c
index 485ef07..baef0cc 100644
--- a/src/shared/att.c
+++ b/src/shared/att.c
@@ -2068,3 +2068,11 @@  done:

        return true;
 }
+
+struct queue *bt_att_get_chans(struct bt_att *att)
+{
+       if (!att)
+               return NULL;
+
+       return att->chans;
+}
\ No newline at end of file
diff --git a/src/shared/att.h b/src/shared/att.h
index 6fd7863..98f91e1 100644
--- a/src/shared/att.h
+++ b/src/shared/att.h
@@ -111,3 +111,5 @@  bool bt_att_set_remote_key(struct bt_att *att,
uint8_t sign_key[16],
                        bt_att_counter_func_t func, void *user_data);
 bool bt_att_has_crypto(struct bt_att *att);
 bool bt_att_set_retry(struct bt_att *att, unsigned int id, bool retry);
+
+struct queue *bt_att_get_chans(struct bt_att *att);
\ No newline at end of file
diff --git a/src/shared/gatt-server.c b/src/shared/gatt-server.c
index 0e399ce..ca9af59 100644
--- a/src/shared/gatt-server.c
+++ b/src/shared/gatt-server.c
@@ -790,7 +790,21 @@  static void write_complete_cb(struct
gatt_db_attribute *attr, int err,
        handle = gatt_db_attribute_get_handle(attr);

        if (err)
-               bt_att_chan_send_error_rsp(op->chan, op->opcode, handle, err);
+       {
+               // Only call bt_att_chan_send_error_rsp if the channel
+               // is still valid
+               struct bt_att *att = bt_gatt_server_get_att(server);
+               struct queue *chans = bt_att_get_chans(att);
+               const struct queue_entry *entry;
+               for (entry = queue_get_entries(chans); entry; entry =
entry->next)
+               {
+                       if (entry->data == op->chan)
+                       {
+                               bt_att_chan_send_error_rsp(op->chan,
op->opcode, handle, err);
+                               break;
+                       }
+               }
+       }