diff mbox series

[v3,1/2] Bluetooth: fix inconsistent lock state in SCO

Message ID 20210721093832.78081-2-desmondcheongzx@gmail.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Bluetooth: fix inconsistent lock states | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 7 this patch: 7
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 111 lines checked
netdev/build_allmodconfig_warn fail Errors and warnings before: 7 this patch: 7
netdev/header_inline success Link

Commit Message

Desmond Cheong Zhi Xi July 21, 2021, 9:38 a.m. UTC
Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
usage in sco_conn_del and sco_sock_timeout that could lead to
deadlocks.

This inconsistent lock state can also happen in sco_conn_ready,
rfcomm_connect_ind, and bt_accept_enqueue.

The issue is that these functions take a spin lock on the socket with
interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
context. This could lead to deadlocks:

       CPU0
       ----
  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
  <Interrupt>
    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

 *** DEADLOCK ***

We fix this by ensuring that local bh is disabled before calling
bh_lock_sock.

After doing this, we additionally need to protect sco_conn_lock by
disabling local bh.

This is necessary because sco_conn_del makes a call to sco_chan_del
while holding on to the sock lock, and sco_chan_del itself makes a
call to sco_conn_lock. If sco_conn_lock is held elsewhere with
interrupts enabled, there could still be a
slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
follows:

        CPU0                    CPU1
        ----                    ----
   lock(&conn->lock#2);
                                local_irq_disable();
                                lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
                                lock(&conn->lock#2);
   <Interrupt>
     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

  *** DEADLOCK ***

Although sco_conn_del disables local bh before calling sco_chan_del,
we can still wrap the calls to sco_conn_lock in sco_chan_del, with
local_bh_disable/enable as this pair of functions are reentrant.

Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
---
 net/bluetooth/sco.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

Comments

Luiz Augusto von Dentz July 27, 2021, 12:30 a.m. UTC | #1
Hi Desmond,

On Wed, Jul 21, 2021 at 2:39 AM Desmond Cheong Zhi Xi
<desmondcheongzx@gmail.com> wrote:
>
> Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
> usage in sco_conn_del and sco_sock_timeout that could lead to
> deadlocks.
>
> This inconsistent lock state can also happen in sco_conn_ready,
> rfcomm_connect_ind, and bt_accept_enqueue.
>
> The issue is that these functions take a spin lock on the socket with
> interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
> context. This could lead to deadlocks:
>
>        CPU0
>        ----
>   lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>   <Interrupt>
>     lock(slock-AF_BLUETOOTH-BTPROTO_SCO);

Having a second look at this, it does seem this is due to use of
sk->sk_timer which apparently run its callback on IRQ context, so I
wonder if wouldn't be a better idea to switch to a delayed_work to
avoid having to deal with the likes of local_bh_disable, in fact it
seems we have been misusing it since the documentation says it is for
sock cleanup not for handling things like SNDTIMEO, we don't really
use it for other socket types so I wonder when we start using
delayed_work we forgot about sco.c.

>  *** DEADLOCK ***
>
> We fix this by ensuring that local bh is disabled before calling
> bh_lock_sock.
>
> After doing this, we additionally need to protect sco_conn_lock by
> disabling local bh.
>
> This is necessary because sco_conn_del makes a call to sco_chan_del
> while holding on to the sock lock, and sco_chan_del itself makes a
> call to sco_conn_lock. If sco_conn_lock is held elsewhere with
> interrupts enabled, there could still be a
> slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
> follows:
>
>         CPU0                    CPU1
>         ----                    ----
>    lock(&conn->lock#2);
>                                 local_irq_disable();
>                                 lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>                                 lock(&conn->lock#2);
>    <Interrupt>
>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>
>   *** DEADLOCK ***
>
> Although sco_conn_del disables local bh before calling sco_chan_del,
> we can still wrap the calls to sco_conn_lock in sco_chan_del, with
> local_bh_disable/enable as this pair of functions are reentrant.
>
> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
> ---
>  net/bluetooth/sco.c | 21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
>
> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
> index 3bd41563f118..34f3419c3330 100644
> --- a/net/bluetooth/sco.c
> +++ b/net/bluetooth/sco.c
> @@ -140,10 +140,12 @@ static void sco_chan_del(struct sock *sk, int err)
>         BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
>
>         if (conn) {
> +               local_bh_disable();
>                 sco_conn_lock(conn);
>                 conn->sk = NULL;
>                 sco_pi(sk)->conn = NULL;
>                 sco_conn_unlock(conn);
> +               local_bh_enable();
>
>                 if (conn->hcon)
>                         hci_conn_drop(conn->hcon);
> @@ -167,16 +169,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
>         BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
>
>         /* Kill socket */
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         sk = conn->sk;
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>
>         if (sk) {
>                 sock_hold(sk);
> +
> +               local_bh_disable();
>                 bh_lock_sock(sk);
>                 sco_sock_clear_timer(sk);
>                 sco_chan_del(sk, err);
>                 bh_unlock_sock(sk);
> +               local_bh_enable();
> +
>                 sco_sock_kill(sk);
>                 sock_put(sk);
>         }
> @@ -202,6 +210,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>  {
>         int err = 0;
>
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         if (conn->sk)
>                 err = -EBUSY;
> @@ -209,6 +218,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>                 __sco_chan_add(conn, sk, parent);
>
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>         return err;
>  }
>
> @@ -303,9 +313,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
>  {
>         struct sock *sk;
>
> +       local_bh_disable();
>         sco_conn_lock(conn);
>         sk = conn->sk;
>         sco_conn_unlock(conn);
> +       local_bh_enable();
>
>         if (!sk)
>                 goto drop;
> @@ -420,10 +432,12 @@ static void __sco_sock_close(struct sock *sk)
>                 if (sco_pi(sk)->conn->hcon) {
>                         sk->sk_state = BT_DISCONN;
>                         sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
> +                       local_bh_disable();
>                         sco_conn_lock(sco_pi(sk)->conn);
>                         hci_conn_drop(sco_pi(sk)->conn->hcon);
>                         sco_pi(sk)->conn->hcon = NULL;
>                         sco_conn_unlock(sco_pi(sk)->conn);
> +                       local_bh_enable();
>                 } else
>                         sco_chan_del(sk, ECONNRESET);
>                 break;
> @@ -1084,21 +1098,26 @@ static void sco_conn_ready(struct sco_conn *conn)
>
>         if (sk) {
>                 sco_sock_clear_timer(sk);
> +               local_bh_disable();
>                 bh_lock_sock(sk);
>                 sk->sk_state = BT_CONNECTED;
>                 sk->sk_state_change(sk);
>                 bh_unlock_sock(sk);
> +               local_bh_enable();
>         } else {
> +               local_bh_disable();
>                 sco_conn_lock(conn);
>
>                 if (!conn->hcon) {
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
>                 parent = sco_get_sock_listen(&conn->hcon->src);
>                 if (!parent) {
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
> @@ -1109,6 +1128,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>                 if (!sk) {
>                         bh_unlock_sock(parent);
>                         sco_conn_unlock(conn);
> +                       local_bh_enable();
>                         return;
>                 }
>
> @@ -1131,6 +1151,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>                 bh_unlock_sock(parent);
>
>                 sco_conn_unlock(conn);
> +               local_bh_enable();
>         }
>  }
>
> --
> 2.25.1
>
Desmond Cheong Zhi Xi July 27, 2021, 5:13 a.m. UTC | #2
On 27/7/21 8:30 am, Luiz Augusto von Dentz wrote:
> Hi Desmond,
> 
> On Wed, Jul 21, 2021 at 2:39 AM Desmond Cheong Zhi Xi
> <desmondcheongzx@gmail.com> wrote:
>>
>> Syzbot reported an inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock
>> usage in sco_conn_del and sco_sock_timeout that could lead to
>> deadlocks.
>>
>> This inconsistent lock state can also happen in sco_conn_ready,
>> rfcomm_connect_ind, and bt_accept_enqueue.
>>
>> The issue is that these functions take a spin lock on the socket with
>> interrupts enabled, but sco_sock_timeout takes the lock in an IRQ
>> context. This could lead to deadlocks:
>>
>>         CPU0
>>         ----
>>    lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>    <Interrupt>
>>      lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
> 
> Having a second look at this, it does seem this is due to use of
> sk->sk_timer which apparently run its callback on IRQ context, so I
> wonder if wouldn't be a better idea to switch to a delayed_work to
> avoid having to deal with the likes of local_bh_disable, in fact it
> seems we have been misusing it since the documentation says it is for
> sock cleanup not for handling things like SNDTIMEO, we don't really
> use it for other socket types so I wonder when we start using
> delayed_work we forgot about sco.c.
> 

Hi Luiz,

That makes sense to me. I don't think there's a need for the timeout to 
be run in an IRQ context.

I'll prepare a patch for this, thanks for the suggestion.

>>   *** DEADLOCK ***
>>
>> We fix this by ensuring that local bh is disabled before calling
>> bh_lock_sock.
>>
>> After doing this, we additionally need to protect sco_conn_lock by
>> disabling local bh.
>>
>> This is necessary because sco_conn_del makes a call to sco_chan_del
>> while holding on to the sock lock, and sco_chan_del itself makes a
>> call to sco_conn_lock. If sco_conn_lock is held elsewhere with
>> interrupts enabled, there could still be a
>> slock-AF_BLUETOOTH-BTPROTO_SCO --> &conn->lock#2 lock inversion as
>> follows:
>>
>>          CPU0                    CPU1
>>          ----                    ----
>>     lock(&conn->lock#2);
>>                                  local_irq_disable();
>>                                  lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>                                  lock(&conn->lock#2);
>>     <Interrupt>
>>       lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
>>
>>    *** DEADLOCK ***
>>
>> Although sco_conn_del disables local bh before calling sco_chan_del,
>> we can still wrap the calls to sco_conn_lock in sco_chan_del, with
>> local_bh_disable/enable as this pair of functions are reentrant.
>>
>> Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
>> Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com
>> Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
>> ---
>>   net/bluetooth/sco.c | 21 +++++++++++++++++++++
>>   1 file changed, 21 insertions(+)
>>
>> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
>> index 3bd41563f118..34f3419c3330 100644
>> --- a/net/bluetooth/sco.c
>> +++ b/net/bluetooth/sco.c
>> @@ -140,10 +140,12 @@ static void sco_chan_del(struct sock *sk, int err)
>>          BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
>>
>>          if (conn) {
>> +               local_bh_disable();
>>                  sco_conn_lock(conn);
>>                  conn->sk = NULL;
>>                  sco_pi(sk)->conn = NULL;
>>                  sco_conn_unlock(conn);
>> +               local_bh_enable();
>>
>>                  if (conn->hcon)
>>                          hci_conn_drop(conn->hcon);
>> @@ -167,16 +169,22 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
>>          BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
>>
>>          /* Kill socket */
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          sk = conn->sk;
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>
>>          if (sk) {
>>                  sock_hold(sk);
>> +
>> +               local_bh_disable();
>>                  bh_lock_sock(sk);
>>                  sco_sock_clear_timer(sk);
>>                  sco_chan_del(sk, err);
>>                  bh_unlock_sock(sk);
>> +               local_bh_enable();
>> +
>>                  sco_sock_kill(sk);
>>                  sock_put(sk);
>>          }
>> @@ -202,6 +210,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>>   {
>>          int err = 0;
>>
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          if (conn->sk)
>>                  err = -EBUSY;
>> @@ -209,6 +218,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
>>                  __sco_chan_add(conn, sk, parent);
>>
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>          return err;
>>   }
>>
>> @@ -303,9 +313,11 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
>>   {
>>          struct sock *sk;
>>
>> +       local_bh_disable();
>>          sco_conn_lock(conn);
>>          sk = conn->sk;
>>          sco_conn_unlock(conn);
>> +       local_bh_enable();
>>
>>          if (!sk)
>>                  goto drop;
>> @@ -420,10 +432,12 @@ static void __sco_sock_close(struct sock *sk)
>>                  if (sco_pi(sk)->conn->hcon) {
>>                          sk->sk_state = BT_DISCONN;
>>                          sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
>> +                       local_bh_disable();
>>                          sco_conn_lock(sco_pi(sk)->conn);
>>                          hci_conn_drop(sco_pi(sk)->conn->hcon);
>>                          sco_pi(sk)->conn->hcon = NULL;
>>                          sco_conn_unlock(sco_pi(sk)->conn);
>> +                       local_bh_enable();
>>                  } else
>>                          sco_chan_del(sk, ECONNRESET);
>>                  break;
>> @@ -1084,21 +1098,26 @@ static void sco_conn_ready(struct sco_conn *conn)
>>
>>          if (sk) {
>>                  sco_sock_clear_timer(sk);
>> +               local_bh_disable();
>>                  bh_lock_sock(sk);
>>                  sk->sk_state = BT_CONNECTED;
>>                  sk->sk_state_change(sk);
>>                  bh_unlock_sock(sk);
>> +               local_bh_enable();
>>          } else {
>> +               local_bh_disable();
>>                  sco_conn_lock(conn);
>>
>>                  if (!conn->hcon) {
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>>                  parent = sco_get_sock_listen(&conn->hcon->src);
>>                  if (!parent) {
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>> @@ -1109,6 +1128,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>>                  if (!sk) {
>>                          bh_unlock_sock(parent);
>>                          sco_conn_unlock(conn);
>> +                       local_bh_enable();
>>                          return;
>>                  }
>>
>> @@ -1131,6 +1151,7 @@ static void sco_conn_ready(struct sco_conn *conn)
>>                  bh_unlock_sock(parent);
>>
>>                  sco_conn_unlock(conn);
>> +               local_bh_enable();
>>          }
>>   }
>>
>> --
>> 2.25.1
>>
> 
>
diff mbox series

Patch

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3bd41563f118..34f3419c3330 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -140,10 +140,12 @@  static void sco_chan_del(struct sock *sk, int err)
 	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
 
 	if (conn) {
+		local_bh_disable();
 		sco_conn_lock(conn);
 		conn->sk = NULL;
 		sco_pi(sk)->conn = NULL;
 		sco_conn_unlock(conn);
+		local_bh_enable();
 
 		if (conn->hcon)
 			hci_conn_drop(conn->hcon);
@@ -167,16 +169,22 @@  static void sco_conn_del(struct hci_conn *hcon, int err)
 	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
 
 	/* Kill socket */
+	local_bh_disable();
 	sco_conn_lock(conn);
 	sk = conn->sk;
 	sco_conn_unlock(conn);
+	local_bh_enable();
 
 	if (sk) {
 		sock_hold(sk);
+
+		local_bh_disable();
 		bh_lock_sock(sk);
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
 		bh_unlock_sock(sk);
+		local_bh_enable();
+
 		sco_sock_kill(sk);
 		sock_put(sk);
 	}
@@ -202,6 +210,7 @@  static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
 {
 	int err = 0;
 
+	local_bh_disable();
 	sco_conn_lock(conn);
 	if (conn->sk)
 		err = -EBUSY;
@@ -209,6 +218,7 @@  static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
 		__sco_chan_add(conn, sk, parent);
 
 	sco_conn_unlock(conn);
+	local_bh_enable();
 	return err;
 }
 
@@ -303,9 +313,11 @@  static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
 {
 	struct sock *sk;
 
+	local_bh_disable();
 	sco_conn_lock(conn);
 	sk = conn->sk;
 	sco_conn_unlock(conn);
+	local_bh_enable();
 
 	if (!sk)
 		goto drop;
@@ -420,10 +432,12 @@  static void __sco_sock_close(struct sock *sk)
 		if (sco_pi(sk)->conn->hcon) {
 			sk->sk_state = BT_DISCONN;
 			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
+			local_bh_disable();
 			sco_conn_lock(sco_pi(sk)->conn);
 			hci_conn_drop(sco_pi(sk)->conn->hcon);
 			sco_pi(sk)->conn->hcon = NULL;
 			sco_conn_unlock(sco_pi(sk)->conn);
+			local_bh_enable();
 		} else
 			sco_chan_del(sk, ECONNRESET);
 		break;
@@ -1084,21 +1098,26 @@  static void sco_conn_ready(struct sco_conn *conn)
 
 	if (sk) {
 		sco_sock_clear_timer(sk);
+		local_bh_disable();
 		bh_lock_sock(sk);
 		sk->sk_state = BT_CONNECTED;
 		sk->sk_state_change(sk);
 		bh_unlock_sock(sk);
+		local_bh_enable();
 	} else {
+		local_bh_disable();
 		sco_conn_lock(conn);
 
 		if (!conn->hcon) {
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
 		parent = sco_get_sock_listen(&conn->hcon->src);
 		if (!parent) {
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
@@ -1109,6 +1128,7 @@  static void sco_conn_ready(struct sco_conn *conn)
 		if (!sk) {
 			bh_unlock_sock(parent);
 			sco_conn_unlock(conn);
+			local_bh_enable();
 			return;
 		}
 
@@ -1131,6 +1151,7 @@  static void sco_conn_ready(struct sco_conn *conn)
 		bh_unlock_sock(parent);
 
 		sco_conn_unlock(conn);
+		local_bh_enable();
 	}
 }