diff mbox series

[10/11] graph-lock: Unlock the AioContext while polling

Message ID 20230531110231.262251-11-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: Re-enable the graph lock | expand

Commit Message

Kevin Wolf May 31, 2023, 11:02 a.m. UTC
If the caller keeps the AioContext lock for a block node in an iothread,
polling in bdrv_graph_wrlock() deadlocks if the condition isn't
fulfilled immediately.

Now that all callers make sure to actually have the AioContext locked
when they call bdrv_replace_child_noperm() like they should, we can
change bdrv_graph_wrlock() to take a BlockDriverState whose AioContext
lock the caller holds (NULL if it doesn't) and unlock it temporarily
while polling.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/block/graph-lock.h |  6 ++++--
 block.c                    |  4 ++--
 block/graph-lock.c         | 23 ++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index 7574a2de5b..7e04f98ff0 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -111,10 +111,12 @@  void unregister_aiocontext(AioContext *ctx);
  * The wrlock can only be taken from the main loop, with BQL held, as only the
  * main loop is allowed to modify the graph.
  *
+ * If @bs is non-NULL, its AioContext is temporarily released.
+ *
  * This function polls. Callers must not hold the lock of any AioContext other
- * than the current one.
+ * than the current one and the one of @bs.
  */
-void bdrv_graph_wrlock(void) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
+void bdrv_graph_wrlock(BlockDriverState *bs) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
 
 /*
  * bdrv_graph_wrunlock:
diff --git a/block.c b/block.c
index d5f8231f31..f94cee8930 100644
--- a/block.c
+++ b/block.c
@@ -2854,7 +2854,7 @@  uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
  * Replaces the node that a BdrvChild points to without updating permissions.
  *
  * If @new_bs is non-NULL, the parent of @child must already be drained through
- * @child.
+ * @child and the caller must hold the AioContext lock for @new_bs.
  */
 static void bdrv_replace_child_noperm(BdrvChild *child,
                                       BlockDriverState *new_bs)
@@ -2893,7 +2893,7 @@  static void bdrv_replace_child_noperm(BdrvChild *child,
     }
 
     /* TODO Pull this up into the callers to avoid polling here */
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(new_bs);
     if (old_bs) {
         if (child->klass->detach) {
             child->klass->detach(child);
diff --git a/block/graph-lock.c b/block/graph-lock.c
index a92c6ae219..3bf2591dc4 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -110,8 +110,10 @@  static uint32_t reader_count(void)
 }
 #endif
 
-void bdrv_graph_wrlock(void)
+void bdrv_graph_wrlock(BlockDriverState *bs)
 {
+    AioContext *ctx = NULL;
+
     GLOBAL_STATE_CODE();
     /*
      * TODO Some callers hold an AioContext lock when this is called, which
@@ -120,7 +122,22 @@  void bdrv_graph_wrlock(void)
      */
 #if 0
     assert(!qatomic_read(&has_writer));
+#endif
+
+    /*
+     * Release only non-mainloop AioContext. The mainloop often relies on the
+     * BQL and doesn't lock the main AioContext before doing things.
+     */
+    if (bs) {
+        ctx = bdrv_get_aio_context(bs);
+        if (ctx != qemu_get_aio_context()) {
+            aio_context_release(ctx);
+        } else {
+            ctx = NULL;
+        }
+    }
 
+#if 0
     /* Make sure that constantly arriving new I/O doesn't cause starvation */
     bdrv_drain_all_begin_nopoll();
 
@@ -150,6 +167,10 @@  void bdrv_graph_wrlock(void)
 
     bdrv_drain_all_end();
 #endif
+
+    if (ctx) {
+        aio_context_acquire(bdrv_get_aio_context(bs));
+    }
 }
 
 void bdrv_graph_wrunlock(void)