diff mbox

[v2,09/16] block: wait for all pending I/O when doing synchronous requests

Message ID 1458137817-15383-10-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini March 16, 2016, 2:16 p.m. UTC
Synchronous I/O should in general happen either in the main thread (e.g.
for bdrv_open and bdrv_create) or between bdrv_drained_begin and
bdrv_drained_end.  Therefore, the simplest way to wait for it to finish
is to wait for _all_ pending I/O to complete.

In fact, there was one case in bdrv_close where we explicitly drained
after bdrv_flush; this is now unnecessary.  And we should probably have
called bdrv_drain_all after calls to bdrv_flush_all, which is now
unnecessary too.

This decouples synchronous I/O from aio_poll.  When the request used
not to be tracked as part of bdrv_drain (e.g. bdrv_co_get_block_status)
we need to update the in_flight count.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: remove unnecessary qed.c hunk [Fam]

 block.c                    |  1 -
 block/io.c                 | 39 ++++++++++++---------------------------
 block/qed-table.c          | 16 ++++------------
 tests/qemu-iotests/060     |  8 ++++++--
 tests/qemu-iotests/060.out |  4 +++-
 5 files changed, 25 insertions(+), 43 deletions(-)

Comments

Fam Zheng March 17, 2016, 2:55 a.m. UTC | #1
On Wed, 03/16 15:16, Paolo Bonzini wrote:
> Synchronous I/O should in general happen either in the main thread (e.g.
> for bdrv_open and bdrv_create) or between bdrv_drained_begin and
> bdrv_drained_end.  Therefore, the simplest way to wait for it to finish
> is to wait for _all_ pending I/O to complete.
> 
> In fact, there was one case in bdrv_close where we explicitly drained
> after bdrv_flush; this is now unnecessary.  And we should probably have
> called bdrv_drain_all after calls to bdrv_flush_all, which is now
> unnecessary too.
> 
> This decouples synchronous I/O from aio_poll.  When the request used
> not to be tracked as part of bdrv_drain (e.g. bdrv_co_get_block_status)
> we need to update the in_flight count.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Fam Zheng <famz@redhat.com>
diff mbox

Patch

diff --git a/block.c b/block.c
index bbd7de6..47f2367 100644
--- a/block.c
+++ b/block.c
@@ -2142,7 +2142,6 @@  static void bdrv_close(BlockDriverState *bs)
 
     bdrv_drained_begin(bs); /* complete I/O */
     bdrv_flush(bs);
-    bdrv_drain(bs); /* in case flush left pending I/O */
 
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
diff --git a/block/io.c b/block/io.c
index db975ab..0a99131 100644
--- a/block/io.c
+++ b/block/io.c
@@ -583,13 +583,9 @@  static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
         /* Fast-path if already in coroutine context */
         bdrv_rw_co_entry(&rwco);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_rw_co_entry);
         qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        bdrv_drain(bs);
     }
 
     bdrv_no_throttling_end(bs);
@@ -1535,17 +1531,19 @@  static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
     }
 
     *file = NULL;
+    bdrv_inc_in_flight(bs);
     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                             file);
     if (ret < 0) {
         *pnum = 0;
-        return ret;
+        goto out;
     }
 
     if (ret & BDRV_BLOCK_RAW) {
         assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
-                                     *pnum, pnum, file);
+        ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+                                    *pnum, pnum, file);
+        goto out;
     }
 
     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
@@ -1587,6 +1585,8 @@  static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         }
     }
 
+out:
+    bdrv_dec_in_flight(bs);
     return ret;
 }
 
@@ -1652,13 +1652,9 @@  int64_t bdrv_get_block_status_above(BlockDriverState *bs,
         /* Fast-path if already in coroutine context */
         bdrv_get_block_status_above_co_entry(&data);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
         qemu_coroutine_enter(co, &data);
-        while (!data.done) {
-            aio_poll(aio_context, true);
-        }
+        bdrv_drain(bs);
     }
     return data.ret;
 }
@@ -2459,13 +2455,9 @@  int bdrv_flush(BlockDriverState *bs)
         /* Fast-path if already in coroutine context */
         bdrv_flush_co_entry(&rwco);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_flush_co_entry);
         qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        bdrv_drain(bs);
     }
 
     return rwco.ret;
@@ -2582,13 +2574,9 @@  int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
         /* Fast-path if already in coroutine context */
         bdrv_discard_co_entry(&rwco);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_discard_co_entry);
         qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        bdrv_drain(bs);
     }
 
     return rwco.ret;
@@ -2663,11 +2651,8 @@  int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         bdrv_co_ioctl_entry(&data);
     } else {
         Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
-
         qemu_coroutine_enter(co, &data);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(bdrv_get_aio_context(bs), true);
-        }
+        bdrv_drain(bs);
     }
     return data.ret;
 }
diff --git a/block/qed-table.c b/block/qed-table.c
index 802945f..3a8566f 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -173,9 +173,7 @@  int qed_read_l1_table_sync(BDRVQEDState *s)
 
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    bdrv_drain(s->bs);
 
     return ret;
 }
@@ -194,9 +192,7 @@  int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
     int ret = -EINPROGRESS;
 
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    bdrv_drain(s->bs);
 
     return ret;
 }
@@ -267,9 +263,7 @@  int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
     int ret = -EINPROGRESS;
 
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    bdrv_drain(s->bs);
 
     return ret;
 }
@@ -289,9 +283,7 @@  int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
     int ret = -EINPROGRESS;
 
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    bdrv_drain(s->bs);
 
     return ret;
 }
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index c81319c..dffe35a 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -164,8 +164,12 @@  echo "open -o file.driver=blkdebug $TEST_IMG
 break cow_read 0
 aio_write 0k 1k
 wait_break 0
-write 64k 64k
-resume 0" | $QEMU_IO | _filter_qemu_io
+break pwritev_done 1
+aio_write 64k 64k
+wait_break 1
+resume 1
+resume 0
+aio_flush" | $QEMU_IO | _filter_qemu_io
 
 echo
 echo "=== Testing unallocated image header ==="
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index 5d40206..a0a9a11 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -105,7 +105,9 @@  discard 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L2 table); further corruption events will be suppressed
 blkdebug: Suspended request '0'
-write failed: Input/output error
+blkdebug: Suspended request '1'
+blkdebug: Resuming request '1'
+aio_write failed: Input/output error
 blkdebug: Resuming request '0'
 aio_write failed: No medium found