[3/4] block, migration: add bdrv_flush_vmstate helper
diff mbox series

Message ID 20200611171143.21589-4-den@openvz.org
State New
Headers show
Series
  • block: seriously improve savevm performance
Related show

Commit Message

Denis V. Lunev June 11, 2020, 5:11 p.m. UTC
Right now bdrv_fclose() is just calling bdrv_flush().

The problem is that migration code is working inefficently from black
layer terms and are frequently called for very small pieces of not
properly aligned data. Block layer is capable to work this way, but
this is very slow.

This patch is a preparation for the introduction of the intermediate
buffer at block driver state. It would be beneficial to separate
conventional bdrv_flush() from closing QEMU file from migration code.

The patch also forces bdrv_flush_vmstate() operation inside
synchronous blk_save_vmstate() operation. This helper is used from
qemu-io only.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <fam@euphon.net>
CC: Juan Quintela <quintela@redhat.com>
CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
CC: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
 block/block-backend.c |  6 +++++-
 block/io.c            | 39 +++++++++++++++++++++++++++++++++++++++
 include/block/block.h |  1 +
 migration/savevm.c    |  3 +++
 4 files changed, 48 insertions(+), 1 deletion(-)

Comments

Vladimir Sementsov-Ogievskiy June 15, 2020, 7:56 a.m. UTC | #1
11.06.2020 20:11, Denis V. Lunev wrote:
> Right now bdrv_fclose() is just calling bdrv_flush().
> 
> The problem is that migration code is working inefficently from black
> layer terms and are frequently called for very small pieces of not
> properly aligned data. Block layer is capable to work this way, but
> this is very slow.
> 
> This patch is a preparation for the introduction of the intermediate
> buffer at block driver state. It would be beneficial to separate
> conventional bdrv_flush() from closing QEMU file from migration code.
> 
> The patch also forces bdrv_flush_vmstate() operation inside
> synchronous blk_save_vmstate() operation. This helper is used from
> qemu-io only.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Max Reitz <mreitz@redhat.com>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Fam Zheng <fam@euphon.net>
> CC: Juan Quintela <quintela@redhat.com>
> CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> CC: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> CC: Denis Plotnikov <dplotnikov@virtuozzo.com>
> ---
>   block/block-backend.c |  6 +++++-
>   block/io.c            | 39 +++++++++++++++++++++++++++++++++++++++
>   include/block/block.h |  1 +
>   migration/savevm.c    |  3 +++
>   4 files changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 9342a475cb..2107ace699 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -2177,7 +2177,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
>   int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
>                        int64_t pos, int size)
>   {
> -    int ret;
> +    int ret, ret2;
>   
>       if (!blk_is_available(blk)) {
>           return -ENOMEDIUM;
> @@ -2187,6 +2187,10 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
>       if (ret < 0) {
>           return ret;
>       }
> +    ret2 = bdrv_flush_vmstate(blk_bs(blk));
> +    if (ret2 < 0) {
> +        return ret;
> +    }
>   
>       if (ret == size && !blk->enable_write_cache) {
>           ret = bdrv_flush(blk_bs(blk));
> diff --git a/block/io.c b/block/io.c
> index 121ce17a49..fbf352f39d 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2725,6 +2725,45 @@ int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
>       return bdrv_rw_vmstate(bs, qiov, pos, true);
>   }
>   
> +
> +typedef struct FlushVMStateCo {
> +    BlockDriverState *bs;
> +    int ret;
> +} FlushVMStateCo;
> +
> +static int coroutine_fn bdrv_co_flush_vmstate(BlockDriverState *bs)
> +{
> +    return 0;
> +}
> +
> +static void coroutine_fn bdrv_flush_vmstate_co_entry(void *opaque)
> +{
> +    FlushVMStateCo *rwco = opaque;
> +
> +    rwco->ret = bdrv_co_flush_vmstate(rwco->bs);
> +    aio_wait_kick();
> +}
> +
> +int bdrv_flush_vmstate(BlockDriverState *bs)
> +{
> +    Coroutine *co;
> +    FlushVMStateCo flush_co = {
> +        .bs = bs,
> +        .ret = NOT_DONE,
> +    };
> +
> +    if (qemu_in_coroutine()) {
> +        /* Fast-path if already in coroutine context */
> +        bdrv_flush_vmstate_co_entry(&flush_co);
> +    } else {
> +        co = qemu_coroutine_create(bdrv_flush_vmstate_co_entry, &flush_co);
> +        bdrv_coroutine_enter(bs, co);
> +        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
> +    }
> +
> +    return flush_co.ret;
> +}

In block/io.c, since 7d2410cea154bf, these coroutine wrappers are using bdrv_run_co() instead.
I hope, it's an intermediate state, and my series with auto-generated wrappers will be applied, still, now we should use bdrv_run_co()-based approach for consistency.

Otherwise, patch looks OK for me, just add a new interface, doing nothing for now. Still, would be good to add a comment in block.h, that bdrv_flush_vmsate is needed after bdrv_save_vmstate.

> +
>   /**************************************************************/
>   /* async I/Os */
>   
> diff --git a/include/block/block.h b/include/block/block.h
> index 25e299605e..024525b87d 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -572,6 +572,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
>   
>   int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
>                         int64_t pos, int size);
> +int bdrv_flush_vmstate(BlockDriverState *bs);
>   
>   void bdrv_img_create(const char *filename, const char *fmt,
>                        const char *base_filename, const char *base_fmt,
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 0ff5bb40ed..9698c909d7 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -150,6 +150,9 @@ static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
>   
>   static int bdrv_fclose(void *opaque, Error **errp)
>   {
> +    int err = bdrv_flush_vmstate(opaque);
> +    if (err < 0)
> +        return err;
>       return bdrv_flush(opaque);
>   }
>   
>

Patch
diff mbox series

diff --git a/block/block-backend.c b/block/block-backend.c
index 9342a475cb..2107ace699 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2177,7 +2177,7 @@  int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
-    int ret;
+    int ret, ret2;
 
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
@@ -2187,6 +2187,10 @@  int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
     if (ret < 0) {
         return ret;
     }
+    ret2 = bdrv_flush_vmstate(blk_bs(blk));
+    if (ret2 < 0) {
+        return ret;
+    }
 
     if (ret == size && !blk->enable_write_cache) {
         ret = bdrv_flush(blk_bs(blk));
diff --git a/block/io.c b/block/io.c
index 121ce17a49..fbf352f39d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2725,6 +2725,45 @@  int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
     return bdrv_rw_vmstate(bs, qiov, pos, true);
 }
 
+
+typedef struct FlushVMStateCo {
+    BlockDriverState *bs;
+    int ret;
+} FlushVMStateCo;
+
+static int coroutine_fn bdrv_co_flush_vmstate(BlockDriverState *bs)
+{
+    return 0;
+}
+
+static void coroutine_fn bdrv_flush_vmstate_co_entry(void *opaque)
+{
+    FlushVMStateCo *rwco = opaque;
+
+    rwco->ret = bdrv_co_flush_vmstate(rwco->bs);
+    aio_wait_kick();
+}
+
+int bdrv_flush_vmstate(BlockDriverState *bs)
+{
+    Coroutine *co;
+    FlushVMStateCo flush_co = {
+        .bs = bs,
+        .ret = NOT_DONE,
+    };
+
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_flush_vmstate_co_entry(&flush_co);
+    } else {
+        co = qemu_coroutine_create(bdrv_flush_vmstate_co_entry, &flush_co);
+        bdrv_coroutine_enter(bs, co);
+        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
+    }
+
+    return flush_co.ret;
+}
+
 /**************************************************************/
 /* async I/Os */
 
diff --git a/include/block/block.h b/include/block/block.h
index 25e299605e..024525b87d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -572,6 +572,7 @@  int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 
 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
                       int64_t pos, int size);
+int bdrv_flush_vmstate(BlockDriverState *bs);
 
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
diff --git a/migration/savevm.c b/migration/savevm.c
index 0ff5bb40ed..9698c909d7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -150,6 +150,9 @@  static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
 
 static int bdrv_fclose(void *opaque, Error **errp)
 {
+    int err = bdrv_flush_vmstate(opaque);
+    if (err < 0)
+        return err;
     return bdrv_flush(opaque);
 }