diff mbox series

[06/14] block: Add missing locking in bdrv_co_drain_bh_cb()

Message ID 20180907161520.26349-7-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show
Series Fix some jobs/drain/aio_poll related hangs | expand

Commit Message

Kevin Wolf Sept. 7, 2018, 4:15 p.m. UTC
bdrv_do_drained_begin/end() assume that they are called with the
AioContext lock of bs held. If we call drain functions from a coroutine
with the AioContext lock held, we yield and schedule a BH to move out of
coroutine context. This means that the lock for the home context of the
coroutine is released and must be re-acquired in the bottom half.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/qemu/coroutine.h |  5 +++++
 block/io.c               | 15 +++++++++++++++
 util/qemu-coroutine.c    |  5 +++++
 3 files changed, 25 insertions(+)

Comments

Fam Zheng Sept. 11, 2018, 8:23 a.m. UTC | #1
On Fri, 09/07 18:15, Kevin Wolf wrote:
> bdrv_do_drained_begin/end() assume that they are called with the
> AioContext lock of bs held. If we call drain functions from a coroutine
> with the AioContext lock held, we yield and schedule a BH to move out of
> coroutine context. This means that the lock for the home context of the
> coroutine is released and must be re-acquired in the bottom half.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  include/qemu/coroutine.h |  5 +++++
>  block/io.c               | 15 +++++++++++++++
>  util/qemu-coroutine.c    |  5 +++++
>  3 files changed, 25 insertions(+)
> 
> diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
> index 6f8a487041..9801e7f5a4 100644
> --- a/include/qemu/coroutine.h
> +++ b/include/qemu/coroutine.h
> @@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
>  void coroutine_fn qemu_coroutine_yield(void);
>  
>  /**
> + * Get the AioContext of the given coroutine
> + */
> +AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
> +
> +/**
>   * Get the currently executing coroutine
>   */
>  Coroutine *coroutine_fn qemu_coroutine_self(void);
> diff --git a/block/io.c b/block/io.c
> index 7100344c7b..914ba78f1a 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
>      BlockDriverState *bs = data->bs;
>  
>      if (bs) {
> +        AioContext *ctx = bdrv_get_aio_context(bs);
> +        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
> +
> +        /*
> +         * When the coroutine yielded, the lock for its home context was
> +         * released, so we need to re-acquire it here. If it explicitly
> +         * acquired a different context, the lock is still held and we don't
> +         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
> +         */

This condition is rather obscure. When is ctx not equal to co_ctx?

> +        if (ctx == co_ctx) {
> +            aio_context_acquire(ctx);
> +        }
>          bdrv_dec_in_flight(bs);
>          if (data->begin) {
>              bdrv_do_drained_begin(bs, data->recursive, data->parent,
> @@ -296,6 +308,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
>              bdrv_do_drained_end(bs, data->recursive, data->parent,
>                                  data->ignore_bds_parents);
>          }
> +        if (ctx == co_ctx) {
> +            aio_context_release(ctx);
> +        }
>      } else {
>          assert(data->begin);
>          bdrv_drain_all_begin();
> diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
> index 1ba4191b84..2295928d33 100644
> --- a/util/qemu-coroutine.c
> +++ b/util/qemu-coroutine.c
> @@ -198,3 +198,8 @@ bool qemu_coroutine_entered(Coroutine *co)
>  {
>      return co->caller;
>  }
> +
> +AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
> +{
> +    return co->ctx;
> +}
> -- 
> 2.13.6
>
Kevin Wolf Sept. 11, 2018, 9:17 a.m. UTC | #2
Am 11.09.2018 um 10:23 hat Fam Zheng geschrieben:
> On Fri, 09/07 18:15, Kevin Wolf wrote:
> > bdrv_do_drained_begin/end() assume that they are called with the
> > AioContext lock of bs held. If we call drain functions from a coroutine
> > with the AioContext lock held, we yield and schedule a BH to move out of
> > coroutine context. This means that the lock for the home context of the
> > coroutine is released and must be re-acquired in the bottom half.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  include/qemu/coroutine.h |  5 +++++
> >  block/io.c               | 15 +++++++++++++++
> >  util/qemu-coroutine.c    |  5 +++++
> >  3 files changed, 25 insertions(+)
> > 
> > diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
> > index 6f8a487041..9801e7f5a4 100644
> > --- a/include/qemu/coroutine.h
> > +++ b/include/qemu/coroutine.h
> > @@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
> >  void coroutine_fn qemu_coroutine_yield(void);
> >  
> >  /**
> > + * Get the AioContext of the given coroutine
> > + */
> > +AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
> > +
> > +/**
> >   * Get the currently executing coroutine
> >   */
> >  Coroutine *coroutine_fn qemu_coroutine_self(void);
> > diff --git a/block/io.c b/block/io.c
> > index 7100344c7b..914ba78f1a 100644
> > --- a/block/io.c
> > +++ b/block/io.c
> > @@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
> >      BlockDriverState *bs = data->bs;
> >  
> >      if (bs) {
> > +        AioContext *ctx = bdrv_get_aio_context(bs);
> > +        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
> > +
> > +        /*
> > +         * When the coroutine yielded, the lock for its home context was
> > +         * released, so we need to re-acquire it here. If it explicitly
> > +         * acquired a different context, the lock is still held and we don't
> > +         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
> > +         */
> 
> This condition is rather obscure. When is ctx not equal to co_ctx?

Whenever you drain a BlockDriverState that is in a different AioContext.
The common case is a bdrv_drain() from the main loop thread for a BDS in
an iothread.

I didn't have this condition at first and ran into deadlocks (because
AIO_WAIT_WHILE() dropped the lock only once, but it was locked twice).

Kevin

> > +        if (ctx == co_ctx) {
> > +            aio_context_acquire(ctx);
> > +        }
> >          bdrv_dec_in_flight(bs);
> >          if (data->begin) {
> >              bdrv_do_drained_begin(bs, data->recursive, data->parent,
> > @@ -296,6 +308,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
> >              bdrv_do_drained_end(bs, data->recursive, data->parent,
> >                                  data->ignore_bds_parents);
> >          }
> > +        if (ctx == co_ctx) {
> > +            aio_context_release(ctx);
> > +        }
> >      } else {
> >          assert(data->begin);
> >          bdrv_drain_all_begin();
Sergio Lopez Sept. 11, 2018, 9:28 a.m. UTC | #3
On Tue, Sep 11, 2018 at 11:17:20AM +0200, Kevin Wolf wrote:
> Am 11.09.2018 um 10:23 hat Fam Zheng geschrieben:
> > On Fri, 09/07 18:15, Kevin Wolf wrote:
> > > bdrv_do_drained_begin/end() assume that they are called with the
> > > AioContext lock of bs held. If we call drain functions from a coroutine
> > > with the AioContext lock held, we yield and schedule a BH to move out of
> > > coroutine context. This means that the lock for the home context of the
> > > coroutine is released and must be re-acquired in the bottom half.
> > > 
> > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > ---
> > >  include/qemu/coroutine.h |  5 +++++
> > >  block/io.c               | 15 +++++++++++++++
> > >  util/qemu-coroutine.c    |  5 +++++
> > >  3 files changed, 25 insertions(+)
> > > 
> > > diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
> > > index 6f8a487041..9801e7f5a4 100644
> > > --- a/include/qemu/coroutine.h
> > > +++ b/include/qemu/coroutine.h
> > > @@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
> > >  void coroutine_fn qemu_coroutine_yield(void);
> > >  
> > >  /**
> > > + * Get the AioContext of the given coroutine
> > > + */
> > > +AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
> > > +
> > > +/**
> > >   * Get the currently executing coroutine
> > >   */
> > >  Coroutine *coroutine_fn qemu_coroutine_self(void);
> > > diff --git a/block/io.c b/block/io.c
> > > index 7100344c7b..914ba78f1a 100644
> > > --- a/block/io.c
> > > +++ b/block/io.c
> > > @@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
> > >      BlockDriverState *bs = data->bs;
> > >  
> > >      if (bs) {
> > > +        AioContext *ctx = bdrv_get_aio_context(bs);
> > > +        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
> > > +
> > > +        /*
> > > +         * When the coroutine yielded, the lock for its home context was
> > > +         * released, so we need to re-acquire it here. If it explicitly
> > > +         * acquired a different context, the lock is still held and we don't
> > > +         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
> > > +         */
> > 
> > This condition is rather obscure. When is ctx not equal to co_ctx?
> 
> Whenever you drain a BlockDriverState that is in a different AioContext.
> The common case is a bdrv_drain() from the main loop thread for a BDS in
> an iothread.

Isn't this a consequence of using qemu_coroutine_enter in co_schedule_bh
[1]?

AFAIK, even if an IOThread's AioContext is being polled by the main loop
thread, all coroutines should be running with the IOThread/BDS
AioContext.

Sergio.

[1] https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg00450.html
Kevin Wolf Sept. 11, 2018, 10:22 a.m. UTC | #4
Am 11.09.2018 um 11:28 hat Sergio Lopez geschrieben:
> On Tue, Sep 11, 2018 at 11:17:20AM +0200, Kevin Wolf wrote:
> > Am 11.09.2018 um 10:23 hat Fam Zheng geschrieben:
> > > On Fri, 09/07 18:15, Kevin Wolf wrote:
> > > > bdrv_do_drained_begin/end() assume that they are called with the
> > > > AioContext lock of bs held. If we call drain functions from a coroutine
> > > > with the AioContext lock held, we yield and schedule a BH to move out of
> > > > coroutine context. This means that the lock for the home context of the
> > > > coroutine is released and must be re-acquired in the bottom half.
> > > > 
> > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > > ---
> > > >  include/qemu/coroutine.h |  5 +++++
> > > >  block/io.c               | 15 +++++++++++++++
> > > >  util/qemu-coroutine.c    |  5 +++++
> > > >  3 files changed, 25 insertions(+)
> > > > 
> > > > diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
> > > > index 6f8a487041..9801e7f5a4 100644
> > > > --- a/include/qemu/coroutine.h
> > > > +++ b/include/qemu/coroutine.h
> > > > @@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
> > > >  void coroutine_fn qemu_coroutine_yield(void);
> > > >  
> > > >  /**
> > > > + * Get the AioContext of the given coroutine
> > > > + */
> > > > +AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
> > > > +
> > > > +/**
> > > >   * Get the currently executing coroutine
> > > >   */
> > > >  Coroutine *coroutine_fn qemu_coroutine_self(void);
> > > > diff --git a/block/io.c b/block/io.c
> > > > index 7100344c7b..914ba78f1a 100644
> > > > --- a/block/io.c
> > > > +++ b/block/io.c
> > > > @@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
> > > >      BlockDriverState *bs = data->bs;
> > > >  
> > > >      if (bs) {
> > > > +        AioContext *ctx = bdrv_get_aio_context(bs);
> > > > +        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
> > > > +
> > > > +        /*
> > > > +         * When the coroutine yielded, the lock for its home context was
> > > > +         * released, so we need to re-acquire it here. If it explicitly
> > > > +         * acquired a different context, the lock is still held and we don't
> > > > +         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
> > > > +         */
> > > 
> > > This condition is rather obscure. When is ctx not equal to co_ctx?
> > 
> > Whenever you drain a BlockDriverState that is in a different AioContext.
> > The common case is a bdrv_drain() from the main loop thread for a BDS in
> > an iothread.
> 
> Isn't this a consequence of using qemu_coroutine_enter in co_schedule_bh
> [1]?
> 
> AFAIK, even if an IOThread's AioContext is being polled by the main loop
> thread, all coroutines should be running with the IOThread/BDS
> AioContext.

You're right, bdrv_co_yield_to_drain() does schedule the BH in the
AioContext of the BDS, so in theory this shouldn't happen. If it was
called from a coroutine with a wrong co->ctx (due to the bug you
mentioned), that would explain the behaviour. Maybe the condition isn't
necessary any more after your fix.

Kevin
diff mbox series

Patch

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 6f8a487041..9801e7f5a4 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -90,6 +90,11 @@  void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
 void coroutine_fn qemu_coroutine_yield(void);
 
 /**
+ * Get the AioContext of the given coroutine
+ */
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
+
+/**
  * Get the currently executing coroutine
  */
 Coroutine *coroutine_fn qemu_coroutine_self(void);
diff --git a/block/io.c b/block/io.c
index 7100344c7b..914ba78f1a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -288,6 +288,18 @@  static void bdrv_co_drain_bh_cb(void *opaque)
     BlockDriverState *bs = data->bs;
 
     if (bs) {
+        AioContext *ctx = bdrv_get_aio_context(bs);
+        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
+
+        /*
+         * When the coroutine yielded, the lock for its home context was
+         * released, so we need to re-acquire it here. If it explicitly
+         * acquired a different context, the lock is still held and we don't
+         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
+         */
+        if (ctx == co_ctx) {
+            aio_context_acquire(ctx);
+        }
         bdrv_dec_in_flight(bs);
         if (data->begin) {
             bdrv_do_drained_begin(bs, data->recursive, data->parent,
@@ -296,6 +308,9 @@  static void bdrv_co_drain_bh_cb(void *opaque)
             bdrv_do_drained_end(bs, data->recursive, data->parent,
                                 data->ignore_bds_parents);
         }
+        if (ctx == co_ctx) {
+            aio_context_release(ctx);
+        }
     } else {
         assert(data->begin);
         bdrv_drain_all_begin();
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 1ba4191b84..2295928d33 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -198,3 +198,8 @@  bool qemu_coroutine_entered(Coroutine *co)
 {
     return co->caller;
 }
+
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
+{
+    return co->ctx;
+}