diff mbox

[PULL,21/31] block: Avoid bs->blk in bdrv_next()

Message ID 1463671329-22655-22-git-send-email-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kevin Wolf May 19, 2016, 3:21 p.m. UTC
We need to introduce a separate BdrvNextIterator struct that can keep
more state than just the current BDS in order to avoid using the bs->blk
pointer.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c                        | 40 +++++++----------------
 block/block-backend.c          | 72 +++++++++++++++++++++++++++++-------------
 block/io.c                     | 13 ++++----
 block/snapshot.c               | 30 +++++++++++-------
 blockdev.c                     |  3 +-
 include/block/block.h          |  3 +-
 include/sysemu/block-backend.h |  1 -
 migration/block.c              |  4 ++-
 monitor.c                      |  6 ++--
 qmp.c                          |  5 ++-
 10 files changed, 102 insertions(+), 75 deletions(-)

Comments

Paolo Bonzini May 20, 2016, 7:54 a.m. UTC | #1
On 19/05/2016 17:21, Kevin Wolf wrote:
> We need to introduce a separate BdrvNextIterator struct that can keep
> more state than just the current BDS in order to avoid using the bs->blk
> pointer.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> ---
>  block.c                        | 40 +++++++----------------
>  block/block-backend.c          | 72 +++++++++++++++++++++++++++++-------------
>  block/io.c                     | 13 ++++----
>  block/snapshot.c               | 30 +++++++++++-------
>  blockdev.c                     |  3 +-
>  include/block/block.h          |  3 +-
>  include/sysemu/block-backend.h |  1 -
>  migration/block.c              |  4 ++-
>  monitor.c                      |  6 ++--
>  qmp.c                          |  5 ++-
>  10 files changed, 102 insertions(+), 75 deletions(-)
> 
> diff --git a/block.c b/block.c
> index fd4cf81..91bf431 100644
> --- a/block.c
> +++ b/block.c
> @@ -2870,25 +2870,6 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
>      return QTAILQ_NEXT(bs, node_list);
>  }
>  
> -/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
> - * the monitor or attached to a BlockBackend */
> -BlockDriverState *bdrv_next(BlockDriverState *bs)
> -{
> -    if (!bs || bs->blk) {
> -        bs = blk_next_root_bs(bs);
> -        if (bs) {
> -            return bs;
> -        }
> -    }
> -
> -    /* Ignore all BDSs that are attached to a BlockBackend here; they have been
> -     * handled by the above block already */
> -    do {
> -        bs = bdrv_next_monitor_owned(bs);
> -    } while (bs && bs->blk);
> -    return bs;
> -}
> -
>  const char *bdrv_get_node_name(const BlockDriverState *bs)
>  {
>      return bs->node_name;
> @@ -3220,10 +3201,11 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
>  
>  void bdrv_invalidate_cache_all(Error **errp)
>  {
> -    BlockDriverState *bs = NULL;
> +    BlockDriverState *bs;
>      Error *local_err = NULL;
> +    BdrvNextIterator *it = NULL;
>  
> -    while ((bs = bdrv_next(bs)) != NULL) {
> +    while ((it = bdrv_next(it, &bs)) != NULL) {
>          AioContext *aio_context = bdrv_get_aio_context(bs);
>  
>          aio_context_acquire(aio_context);
> @@ -3265,10 +3247,11 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
>  int bdrv_inactivate_all(void)
>  {
>      BlockDriverState *bs = NULL;
> +    BdrvNextIterator *it = NULL;
>      int ret = 0;
>      int pass;
>  
> -    while ((bs = bdrv_next(bs)) != NULL) {
> +    while ((it = bdrv_next(it, &bs)) != NULL) {
>          aio_context_acquire(bdrv_get_aio_context(bs));
>      }
>  
> @@ -3277,8 +3260,8 @@ int bdrv_inactivate_all(void)
>       * the second pass sets the BDRV_O_INACTIVE flag so that no further write
>       * is allowed. */
>      for (pass = 0; pass < 2; pass++) {
> -        bs = NULL;
> -        while ((bs = bdrv_next(bs)) != NULL) {
> +        it = NULL;
> +        while ((it = bdrv_next(it, &bs)) != NULL) {
>              ret = bdrv_inactivate_recurse(bs, pass);
>              if (ret < 0) {
>                  goto out;
> @@ -3287,8 +3270,8 @@ int bdrv_inactivate_all(void)
>      }
>  
>  out:
> -    bs = NULL;
> -    while ((bs = bdrv_next(bs)) != NULL) {
> +    it = NULL;
> +    while ((it = bdrv_next(it, &bs)) != NULL) {
>          aio_context_release(bdrv_get_aio_context(bs));
>      }
>  
> @@ -3781,10 +3764,11 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
>   */
>  bool bdrv_is_first_non_filter(BlockDriverState *candidate)
>  {
> -    BlockDriverState *bs = NULL;
> +    BlockDriverState *bs;
> +    BdrvNextIterator *it = NULL;
>  
>      /* walk down the bs forest recursively */
> -    while ((bs = bdrv_next(bs)) != NULL) {
> +    while ((it = bdrv_next(it, &bs)) != NULL) {
>          bool perm;
>  
>          /* try to recurse in this top level bs */
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 9dcac97..7f2eeb0 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -75,6 +75,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
>  };
>  
>  static void drive_info_del(DriveInfo *dinfo);
> +static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
>  
>  /* All BlockBackends */
>  static QTAILQ_HEAD(, BlockBackend) block_backends =
> @@ -286,28 +287,50 @@ BlockBackend *blk_next(BlockBackend *blk)
>                 : QTAILQ_FIRST(&monitor_block_backends);
>  }
>  
> -/*
> - * Iterates over all BlockDriverStates which are attached to a BlockBackend.
> - * This function is for use by bdrv_next().
> - *
> - * @bs must be NULL or a BDS that is attached to a BB.
> - */
> -BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
> -{
> +struct BdrvNextIterator {
> +    enum {
> +        BDRV_NEXT_BACKEND_ROOTS,
> +        BDRV_NEXT_MONITOR_OWNED,
> +    } phase;
>      BlockBackend *blk;
> +    BlockDriverState *bs;
> +};
>  
> -    if (bs) {
> -        assert(bs->blk);
> -        blk = bs->blk;
> -    } else {
> -        blk = NULL;
> +/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
> + * the monitor or attached to a BlockBackend */
> +BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
> +{
> +    if (!it) {
> +        it = g_new(BdrvNextIterator, 1);

Hmm, who frees it?  (Especially if the caller exits the loop
prematurely, which means you cannot just free it before returning NULL).
 I think it's better to:

- allocate the iterator on the stack and make bdrv_next return a BDS *

- and add a bdrv_first function that does this:

> +        *it = (BdrvNextIterator) {
> +            .phase = BDRV_NEXT_BACKEND_ROOTS,
> +        };

and then returns bdrv_next(it);

- if desirable add a macro that abstracts the calls to bdrv_first and
bdrv_next.

Thanks,

Paolo
Kevin Wolf May 20, 2016, 8:05 a.m. UTC | #2
Am 20.05.2016 um 09:54 hat Paolo Bonzini geschrieben:
> 
> 
> On 19/05/2016 17:21, Kevin Wolf wrote:
> > We need to introduce a separate BdrvNextIterator struct that can keep
> > more state than just the current BDS in order to avoid using the bs->blk
> > pointer.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > Reviewed-by: Max Reitz <mreitz@redhat.com>
> > ---
> >  block.c                        | 40 +++++++----------------
> >  block/block-backend.c          | 72 +++++++++++++++++++++++++++++-------------
> >  block/io.c                     | 13 ++++----
> >  block/snapshot.c               | 30 +++++++++++-------
> >  blockdev.c                     |  3 +-
> >  include/block/block.h          |  3 +-
> >  include/sysemu/block-backend.h |  1 -
> >  migration/block.c              |  4 ++-
> >  monitor.c                      |  6 ++--
> >  qmp.c                          |  5 ++-
> >  10 files changed, 102 insertions(+), 75 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index fd4cf81..91bf431 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -2870,25 +2870,6 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
> >      return QTAILQ_NEXT(bs, node_list);
> >  }
> >  
> > -/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
> > - * the monitor or attached to a BlockBackend */
> > -BlockDriverState *bdrv_next(BlockDriverState *bs)
> > -{
> > -    if (!bs || bs->blk) {
> > -        bs = blk_next_root_bs(bs);
> > -        if (bs) {
> > -            return bs;
> > -        }
> > -    }
> > -
> > -    /* Ignore all BDSs that are attached to a BlockBackend here; they have been
> > -     * handled by the above block already */
> > -    do {
> > -        bs = bdrv_next_monitor_owned(bs);
> > -    } while (bs && bs->blk);
> > -    return bs;
> > -}
> > -
> >  const char *bdrv_get_node_name(const BlockDriverState *bs)
> >  {
> >      return bs->node_name;
> > @@ -3220,10 +3201,11 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
> >  
> >  void bdrv_invalidate_cache_all(Error **errp)
> >  {
> > -    BlockDriverState *bs = NULL;
> > +    BlockDriverState *bs;
> >      Error *local_err = NULL;
> > +    BdrvNextIterator *it = NULL;
> >  
> > -    while ((bs = bdrv_next(bs)) != NULL) {
> > +    while ((it = bdrv_next(it, &bs)) != NULL) {
> >          AioContext *aio_context = bdrv_get_aio_context(bs);
> >  
> >          aio_context_acquire(aio_context);
> > @@ -3265,10 +3247,11 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
> >  int bdrv_inactivate_all(void)
> >  {
> >      BlockDriverState *bs = NULL;
> > +    BdrvNextIterator *it = NULL;
> >      int ret = 0;
> >      int pass;
> >  
> > -    while ((bs = bdrv_next(bs)) != NULL) {
> > +    while ((it = bdrv_next(it, &bs)) != NULL) {
> >          aio_context_acquire(bdrv_get_aio_context(bs));
> >      }
> >  
> > @@ -3277,8 +3260,8 @@ int bdrv_inactivate_all(void)
> >       * the second pass sets the BDRV_O_INACTIVE flag so that no further write
> >       * is allowed. */
> >      for (pass = 0; pass < 2; pass++) {
> > -        bs = NULL;
> > -        while ((bs = bdrv_next(bs)) != NULL) {
> > +        it = NULL;
> > +        while ((it = bdrv_next(it, &bs)) != NULL) {
> >              ret = bdrv_inactivate_recurse(bs, pass);
> >              if (ret < 0) {
> >                  goto out;
> > @@ -3287,8 +3270,8 @@ int bdrv_inactivate_all(void)
> >      }
> >  
> >  out:
> > -    bs = NULL;
> > -    while ((bs = bdrv_next(bs)) != NULL) {
> > +    it = NULL;
> > +    while ((it = bdrv_next(it, &bs)) != NULL) {
> >          aio_context_release(bdrv_get_aio_context(bs));
> >      }
> >  
> > @@ -3781,10 +3764,11 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
> >   */
> >  bool bdrv_is_first_non_filter(BlockDriverState *candidate)
> >  {
> > -    BlockDriverState *bs = NULL;
> > +    BlockDriverState *bs;
> > +    BdrvNextIterator *it = NULL;
> >  
> >      /* walk down the bs forest recursively */
> > -    while ((bs = bdrv_next(bs)) != NULL) {
> > +    while ((it = bdrv_next(it, &bs)) != NULL) {
> >          bool perm;
> >  
> >          /* try to recurse in this top level bs */
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index 9dcac97..7f2eeb0 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -75,6 +75,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
> >  };
> >  
> >  static void drive_info_del(DriveInfo *dinfo);
> > +static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
> >  
> >  /* All BlockBackends */
> >  static QTAILQ_HEAD(, BlockBackend) block_backends =
> > @@ -286,28 +287,50 @@ BlockBackend *blk_next(BlockBackend *blk)
> >                 : QTAILQ_FIRST(&monitor_block_backends);
> >  }
> >  
> > -/*
> > - * Iterates over all BlockDriverStates which are attached to a BlockBackend.
> > - * This function is for use by bdrv_next().
> > - *
> > - * @bs must be NULL or a BDS that is attached to a BB.
> > - */
> > -BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
> > -{
> > +struct BdrvNextIterator {
> > +    enum {
> > +        BDRV_NEXT_BACKEND_ROOTS,
> > +        BDRV_NEXT_MONITOR_OWNED,
> > +    } phase;
> >      BlockBackend *blk;
> > +    BlockDriverState *bs;
> > +};
> >  
> > -    if (bs) {
> > -        assert(bs->blk);
> > -        blk = bs->blk;
> > -    } else {
> > -        blk = NULL;
> > +/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
> > + * the monitor or attached to a BlockBackend */
> > +BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
> > +{
> > +    if (!it) {
> > +        it = g_new(BdrvNextIterator, 1);
> 
> Hmm, who frees it?  (Especially if the caller exits the loop
> prematurely, which means you cannot just free it before returning NULL).
>  I think it's better to:
> 
> - allocate the iterator on the stack and make bdrv_next return a BDS *
> 
> - and add a bdrv_first function that does this:
> 
> > +        *it = (BdrvNextIterator) {
> > +            .phase = BDRV_NEXT_BACKEND_ROOTS,
> > +        };
> 
> and then returns bdrv_next(it);
> 
> - if desirable add a macro that abstracts the calls to bdrv_first and
> bdrv_next.

Already posted a fix. I chose to keep the interface and free the
BdrvNextIterator inside bdrv_next(), when we return NULL after the last
element.

Kevin
Kevin Wolf May 20, 2016, 8:10 a.m. UTC | #3
Am 20.05.2016 um 10:05 hat Kevin Wolf geschrieben:
> Am 20.05.2016 um 09:54 hat Paolo Bonzini geschrieben:
> > > +/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
> > > + * the monitor or attached to a BlockBackend */
> > > +BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
> > > +{
> > > +    if (!it) {
> > > +        it = g_new(BdrvNextIterator, 1);
> > 
> > Hmm, who frees it?  (Especially if the caller exits the loop
> > prematurely, which means you cannot just free it before returning NULL).
> >  I think it's better to:
> > 
> > - allocate the iterator on the stack and make bdrv_next return a BDS *
> > 
> > - and add a bdrv_first function that does this:
> > 
> > > +        *it = (BdrvNextIterator) {
> > > +            .phase = BDRV_NEXT_BACKEND_ROOTS,
> > > +        };
> > 
> > and then returns bdrv_next(it);
> > 
> > - if desirable add a macro that abstracts the calls to bdrv_first and
> > bdrv_next.
> 
> Already posted a fix. I chose to keep the interface and free the
> BdrvNextIterator inside bdrv_next(), when we return NULL after the last
> element.

Oops, should have actually read your email... You're right about callers
that prematurely exit the loop, of course.

I still don't really like first/next interfaces, though. Perhaps start
the iteration with bs == NULL instead of it == NULL?

Kevin
Paolo Bonzini May 20, 2016, 9:39 a.m. UTC | #4
On 20/05/2016 10:10, Kevin Wolf wrote:
>> > Already posted a fix. I chose to keep the interface and free the
>> > BdrvNextIterator inside bdrv_next(), when we return NULL after the last
>> > element.
> Oops, should have actually read your email... You're right about callers
> that prematurely exit the loop, of course.
> 
> I still don't really like first/next interfaces, though. Perhaps start
> the iteration with bs == NULL instead of it == NULL?

Yet another alternative is to add a BDRV_NEXT_ITERATOR_INITIALIZER
macro.  I like it because it's less magic than "x is NULL" and because I
would prefer an interface with just the BdrvNextIterator* as the
argument to bdrv_next.

Thanks,

Paolo
Kevin Wolf May 20, 2016, 10:26 a.m. UTC | #5
Am 20.05.2016 um 11:39 hat Paolo Bonzini geschrieben:
> 
> 
> On 20/05/2016 10:10, Kevin Wolf wrote:
> >> > Already posted a fix. I chose to keep the interface and free the
> >> > BdrvNextIterator inside bdrv_next(), when we return NULL after the last
> >> > element.
> > Oops, should have actually read your email... You're right about callers
> > that prematurely exit the loop, of course.
> > 
> > I still don't really like first/next interfaces, though. Perhaps start
> > the iteration with bs == NULL instead of it == NULL?
> 
> Yet another alternative is to add a BDRV_NEXT_ITERATOR_INITIALIZER
> macro.  I like it because it's less magic than "x is NULL" and because I
> would prefer an interface with just the BdrvNextIterator* as the
> argument to bdrv_next.

Hm, we have a few instances where an iterator variable is used for
multiple loops, so we need to be able to use it in an assignment, i.e.
it should be a compound literal. On the other hand, I seem to remember
that compound literals can't be used as initialisers.

Maybe a bdrv_next_iterator_reset() function then? Which would be like
first/next, except that it doesn't return the first value yet.

Kevin
Paolo Bonzini May 20, 2016, 10:46 a.m. UTC | #6
On 20/05/2016 12:26, Kevin Wolf wrote:
> Hm, we have a few instances where an iterator variable is used for
> multiple loops, so we need to be able to use it in an assignment, i.e.
> it should be a compound literal. On the other hand, I seem to remember
> that compound literals can't be used as initialisers.
> 
> Maybe a bdrv_next_iterator_reset() function then? Which would be like
> first/next, except that it doesn't return the first value yet.

What qemu/queue.h does is is provide both FOO_INITIALIZER and
FOO_INIT(&some_foo).

Thanks,

Paolo
diff mbox

Patch

diff --git a/block.c b/block.c
index fd4cf81..91bf431 100644
--- a/block.c
+++ b/block.c
@@ -2870,25 +2870,6 @@  BlockDriverState *bdrv_next_node(BlockDriverState *bs)
     return QTAILQ_NEXT(bs, node_list);
 }
 
-/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
- * the monitor or attached to a BlockBackend */
-BlockDriverState *bdrv_next(BlockDriverState *bs)
-{
-    if (!bs || bs->blk) {
-        bs = blk_next_root_bs(bs);
-        if (bs) {
-            return bs;
-        }
-    }
-
-    /* Ignore all BDSs that are attached to a BlockBackend here; they have been
-     * handled by the above block already */
-    do {
-        bs = bdrv_next_monitor_owned(bs);
-    } while (bs && bs->blk);
-    return bs;
-}
-
 const char *bdrv_get_node_name(const BlockDriverState *bs)
 {
     return bs->node_name;
@@ -3220,10 +3201,11 @@  void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
 
 void bdrv_invalidate_cache_all(Error **errp)
 {
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
     Error *local_err = NULL;
+    BdrvNextIterator *it = NULL;
 
-    while ((bs = bdrv_next(bs)) != NULL) {
+    while ((it = bdrv_next(it, &bs)) != NULL) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -3265,10 +3247,11 @@  static int bdrv_inactivate_recurse(BlockDriverState *bs,
 int bdrv_inactivate_all(void)
 {
     BlockDriverState *bs = NULL;
+    BdrvNextIterator *it = NULL;
     int ret = 0;
     int pass;
 
-    while ((bs = bdrv_next(bs)) != NULL) {
+    while ((it = bdrv_next(it, &bs)) != NULL) {
         aio_context_acquire(bdrv_get_aio_context(bs));
     }
 
@@ -3277,8 +3260,8 @@  int bdrv_inactivate_all(void)
      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
      * is allowed. */
     for (pass = 0; pass < 2; pass++) {
-        bs = NULL;
-        while ((bs = bdrv_next(bs)) != NULL) {
+        it = NULL;
+        while ((it = bdrv_next(it, &bs)) != NULL) {
             ret = bdrv_inactivate_recurse(bs, pass);
             if (ret < 0) {
                 goto out;
@@ -3287,8 +3270,8 @@  int bdrv_inactivate_all(void)
     }
 
 out:
-    bs = NULL;
-    while ((bs = bdrv_next(bs)) != NULL) {
+    it = NULL;
+    while ((it = bdrv_next(it, &bs)) != NULL) {
         aio_context_release(bdrv_get_aio_context(bs));
     }
 
@@ -3781,10 +3764,11 @@  bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
  */
 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 {
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
     /* walk down the bs forest recursively */
-    while ((bs = bdrv_next(bs)) != NULL) {
+    while ((it = bdrv_next(it, &bs)) != NULL) {
         bool perm;
 
         /* try to recurse in this top level bs */
diff --git a/block/block-backend.c b/block/block-backend.c
index 9dcac97..7f2eeb0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -75,6 +75,7 @@  static const AIOCBInfo block_backend_aiocb_info = {
 };
 
 static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
 
 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -286,28 +287,50 @@  BlockBackend *blk_next(BlockBackend *blk)
                : QTAILQ_FIRST(&monitor_block_backends);
 }
 
-/*
- * Iterates over all BlockDriverStates which are attached to a BlockBackend.
- * This function is for use by bdrv_next().
- *
- * @bs must be NULL or a BDS that is attached to a BB.
- */
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
-{
+struct BdrvNextIterator {
+    enum {
+        BDRV_NEXT_BACKEND_ROOTS,
+        BDRV_NEXT_MONITOR_OWNED,
+    } phase;
     BlockBackend *blk;
+    BlockDriverState *bs;
+};
 
-    if (bs) {
-        assert(bs->blk);
-        blk = bs->blk;
-    } else {
-        blk = NULL;
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
+{
+    if (!it) {
+        it = g_new(BdrvNextIterator, 1);
+        *it = (BdrvNextIterator) {
+            .phase = BDRV_NEXT_BACKEND_ROOTS,
+        };
+    }
+
+    /* First, return all root nodes of BlockBackends. In order to avoid
+     * returning a BDS twice when multiple BBs refer to it, we only return it
+     * if the BB is the first one in the parent list of the BDS. */
+    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+        do {
+            it->blk = blk_all_next(it->blk);
+            *bs = it->blk ? blk_bs(it->blk) : NULL;
+        } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk));
+
+        if (*bs) {
+            return it;
+        }
+        it->phase = BDRV_NEXT_MONITOR_OWNED;
     }
 
+    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+     * BDSes that are attached to a BlockBackend here; they have been handled
+     * by the above block already */
     do {
-        blk = blk_all_next(blk);
-    } while (blk && !blk->root);
+        it->bs = bdrv_next_monitor_owned(it->bs);
+        *bs = it->bs;
+    } while (*bs && bdrv_has_blk(*bs));
 
-    return blk ? blk->root->bs : NULL;
+    return *bs ? it : NULL;
 }
 
 /*
@@ -394,21 +417,26 @@  BlockDriverState *blk_bs(BlockBackend *blk)
     return blk->root ? blk->root->bs : NULL;
 }
 
-/*
- * Returns true if @bs has an associated BlockBackend.
- */
-bool bdrv_has_blk(BlockDriverState *bs)
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
 {
     BdrvChild *child;
     QLIST_FOREACH(child, &bs->parents, next_parent) {
         if (child->role == &child_root) {
             assert(bs->blk);
-            return true;
+            return child->opaque;
         }
     }
 
     assert(!bs->blk);
-    return false;
+    return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+    return bdrv_first_blk(bs) != NULL;
 }
 
 /*
diff --git a/block/io.c b/block/io.c
index 8a6f470..60a6bd8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -270,10 +270,11 @@  void bdrv_drain_all(void)
 {
     /* Always run first iteration so any pending completion BHs run */
     bool busy = true;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
     GSList *aio_ctxs = NULL, *ctx;
 
-    while ((bs = bdrv_next(bs))) {
+    while ((it = bdrv_next(it, &bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -301,10 +302,10 @@  void bdrv_drain_all(void)
 
         for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
             AioContext *aio_context = ctx->data;
-            bs = NULL;
+            it = NULL;
 
             aio_context_acquire(aio_context);
-            while ((bs = bdrv_next(bs))) {
+            while ((it = bdrv_next(it, &bs))) {
                 if (aio_context == bdrv_get_aio_context(bs)) {
                     if (bdrv_requests_pending(bs)) {
                         busy = true;
@@ -317,8 +318,8 @@  void bdrv_drain_all(void)
         }
     }
 
-    bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    it = NULL;
+    while ((it = bdrv_next(it, &bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
diff --git a/block/snapshot.c b/block/snapshot.c
index e9d721d..3917ec5 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -373,9 +373,10 @@  int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
 bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
 {
     bool ok = true;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (ok && (bs = bdrv_next(bs))) {
+    while (ok && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -393,10 +394,11 @@  int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
                              Error **err)
 {
     int ret = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
     QEMUSnapshotInfo sn1, *snapshot = &sn1;
 
-    while (ret == 0 && (bs = bdrv_next(bs))) {
+    while (ret == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -415,9 +417,10 @@  int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
 int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
     int err = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (err == 0 && (bs = bdrv_next(bs))) {
+    while (err == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -435,9 +438,10 @@  int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
     QEMUSnapshotInfo sn;
     int err = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (err == 0 && (bs = bdrv_next(bs))) {
+    while (err == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -457,9 +461,10 @@  int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
                              BlockDriverState **first_bad_bs)
 {
     int err = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (err == 0 && (bs = bdrv_next(bs))) {
+    while (err == 0 && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -480,9 +485,10 @@  int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
 BlockDriverState *bdrv_all_find_vmstate_bs(void)
 {
     bool not_found = true;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    while (not_found && (bs = bdrv_next(bs))) {
+    while (not_found && (it = bdrv_next(it, &bs))) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
diff --git a/blockdev.c b/blockdev.c
index 03ddd3a..442ca8d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -4135,8 +4135,9 @@  BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
     BlockJobInfoList *head = NULL, **p_next = &head;
     BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
+    while ((it = bdrv_next(it, &bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
diff --git a/include/block/block.h b/include/block/block.h
index d1f9380..a8c15e3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -17,6 +17,7 @@  typedef struct BlockJob BlockJob;
 typedef struct BdrvChild BdrvChild;
 typedef struct BdrvChildRole BdrvChildRole;
 typedef struct BlockJobTxn BlockJobTxn;
+typedef struct BdrvNextIterator BdrvNextIterator;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
@@ -401,7 +402,7 @@  BlockDriverState *bdrv_lookup_bs(const char *device,
                                  Error **errp);
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
 BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BlockDriverState *bdrv_next(BlockDriverState *bs);
+BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs);
 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
 int bdrv_is_encrypted(BlockDriverState *bs);
 int bdrv_key_required(BlockDriverState *bs);
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 44a222d..68d92b5 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -89,7 +89,6 @@  void blk_remove_all_bs(void);
 const char *blk_name(BlockBackend *blk);
 BlockBackend *blk_by_name(const char *name);
 BlockBackend *blk_next(BlockBackend *blk);
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs);
 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
 void monitor_remove_blk(BlockBackend *blk);
 
diff --git a/migration/block.c b/migration/block.c
index 1743317..a7a76a0 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -383,6 +383,7 @@  static void init_blk_migration(QEMUFile *f)
     BlockDriverState *bs;
     BlkMigDevState *bmds;
     int64_t sectors;
+    BdrvNextIterator *it = NULL;
 
     block_mig_state.submitted = 0;
     block_mig_state.read_done = 0;
@@ -392,7 +393,8 @@  static void init_blk_migration(QEMUFile *f)
     block_mig_state.bulk_completed = 0;
     block_mig_state.zero_blocks = migrate_zero_blocks();
 
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
+
+    while ((it = bdrv_next(it, &bs))) {
         if (bdrv_is_read_only(bs)) {
             continue;
         }
diff --git a/monitor.c b/monitor.c
index d1c1930..2c87244 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3427,11 +3427,13 @@  void host_net_remove_completion(ReadLineState *rs, int nb_args, const char *str)
 static void vm_completion(ReadLineState *rs, const char *str)
 {
     size_t len;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;
+    BdrvNextIterator *it = NULL;
 
     len = strlen(str);
     readline_set_completion_index(rs, len);
-    while ((bs = bdrv_next(bs))) {
+
+    while ((it = bdrv_next(it, &bs))) {
         SnapshotInfoList *snapshots, *snapshot;
         AioContext *ctx = bdrv_get_aio_context(bs);
         bool ok = false;
diff --git a/qmp.c b/qmp.c
index e784a67..8f8ae3a 100644
--- a/qmp.c
+++ b/qmp.c
@@ -181,6 +181,7 @@  void qmp_cont(Error **errp)
     Error *local_err = NULL;
     BlockBackend *blk;
     BlockDriverState *bs;
+    BdrvNextIterator *it;
 
     /* if there is a dump in background, we should wait until the dump
      * finished */
@@ -199,7 +200,9 @@  void qmp_cont(Error **errp)
     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
         blk_iostatus_reset(blk);
     }
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
+
+    it = NULL;
+    while ((it = bdrv_next(it, &bs))) {
         bdrv_add_key(bs, NULL, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);