diff mbox series

[v2,1/5] block/nbd: Fix hang in .bdrv_close()

Message ID 20200122164532.178040-2-mreitz@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: Generic file creation fallback | expand

Commit Message

Max Reitz Jan. 22, 2020, 4:45 p.m. UTC
When nbd_close() is called from a coroutine, the connection_co never
gets to run, and thus nbd_teardown_connection() hangs.

This is because aio_co_enter() only puts the connection_co into the main
coroutine's wake-up queue, so this main coroutine needs to yield and
wait for connection_co to terminate.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/nbd.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

Comments

Eric Blake Jan. 22, 2020, 6:55 p.m. UTC | #1
On 1/22/20 10:45 AM, Max Reitz wrote:
> When nbd_close() is called from a coroutine, the connection_co never
> gets to run, and thus nbd_teardown_connection() hangs.
> 
> This is because aio_co_enter() only puts the connection_co into the main
> coroutine's wake-up queue, so this main coroutine needs to yield and
> wait for connection_co to terminate.
> 
> Suggested-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>   block/nbd.c | 14 +++++++++++++-
>   1 file changed, 13 insertions(+), 1 deletion(-)

Reviewed-by: Eric Blake <eblake@redhat.com>

I'm assuming it is better to take the entire series through somewhere 
other than my NBD tree, since the remaining patches touch more than NBD?
Maxim Levitsky Jan. 23, 2020, 10:54 p.m. UTC | #2
On Wed, 2020-01-22 at 17:45 +0100, Max Reitz wrote:
> When nbd_close() is called from a coroutine, the connection_co never
> gets to run, and thus nbd_teardown_connection() hangs.
> 
> This is because aio_co_enter() only puts the connection_co into the main
> coroutine's wake-up queue, so this main coroutine needs to yield and
> wait for connection_co to terminate.

Took me a while to understand this back then, good that I explained
my thoughts in the review comment, although the commit message
is alright when you already understand qemu co-routines I guess.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky
> 
> Suggested-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/nbd.c | 14 +++++++++++++-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index d085554f21..6d3b22f844 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -70,6 +70,7 @@ typedef struct BDRVNBDState {
>      CoMutex send_mutex;
>      CoQueue free_sema;
>      Coroutine *connection_co;
> +    Coroutine *teardown_co;
>      QemuCoSleepState *connection_co_sleep_ns_state;
>      bool drained;
>      bool wait_drained_end;
> @@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs)
>              qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
>          }
>      }
> -    BDRV_POLL_WHILE(bs, s->connection_co);
> +    if (qemu_in_coroutine()) {
> +        s->teardown_co = qemu_coroutine_self();
> +        /* connection_co resumes us when it terminates */
> +        qemu_coroutine_yield();
> +        s->teardown_co = NULL;
> +    } else {
> +        BDRV_POLL_WHILE(bs, s->connection_co);
> +    }
> +    assert(!s->connection_co);
>  }
>  
>  static bool nbd_client_connecting(BDRVNBDState *s)
> @@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque)
>          s->ioc = NULL;
>      }
>  
> +    if (s->teardown_co) {
> +        aio_co_wake(s->teardown_co);
> +    }
>      aio_wait_kick();
>  }
>
diff mbox series

Patch

diff --git a/block/nbd.c b/block/nbd.c
index d085554f21..6d3b22f844 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -70,6 +70,7 @@  typedef struct BDRVNBDState {
     CoMutex send_mutex;
     CoQueue free_sema;
     Coroutine *connection_co;
+    Coroutine *teardown_co;
     QemuCoSleepState *connection_co_sleep_ns_state;
     bool drained;
     bool wait_drained_end;
@@ -203,7 +204,15 @@  static void nbd_teardown_connection(BlockDriverState *bs)
             qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
         }
     }
-    BDRV_POLL_WHILE(bs, s->connection_co);
+    if (qemu_in_coroutine()) {
+        s->teardown_co = qemu_coroutine_self();
+        /* connection_co resumes us when it terminates */
+        qemu_coroutine_yield();
+        s->teardown_co = NULL;
+    } else {
+        BDRV_POLL_WHILE(bs, s->connection_co);
+    }
+    assert(!s->connection_co);
 }
 
 static bool nbd_client_connecting(BDRVNBDState *s)
@@ -395,6 +404,9 @@  static coroutine_fn void nbd_connection_entry(void *opaque)
         s->ioc = NULL;
     }
 
+    if (s->teardown_co) {
+        aio_co_wake(s->teardown_co);
+    }
     aio_wait_kick();
 }