diff mbox

[v6,4/6] block: ignore flush requests when storage is clean

Message ID 1468499383-17840-5-git-send-email-den@openvz.org
State New
Headers show

Commit Message

Denis V. Lunev July 14, 2016, 12:29 p.m. UTC
From: Evgeny Yakovlev <eyakovlev@virtuozzo.com>

Some guests (win2008 server for example) do a lot of unnecessary
flushing when underlying media has not changed. This adds additional
overhead on host when calling fsync/fdatasync.

This change introduces a write generation scheme in BlockDriverState.
Current write generation is checked against last flushed generation to
avoid unnessesary flushes.

The problem with excessive flushing was found by a performance test
which does parallel directory tree creation (from 2 processes).
Results improved from 0.424 loops/sec to 0.432 loops/sec.
Each loop creates 10^3 directories with 10 files in each.

Signed-off-by: Evgeny Yakovlev <eyakovlev@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Fam Zheng <famz@redhat.com>
CC: John Snow <jsnow@redhat.com>
---
 block.c                   |  3 +++
 block/io.c                | 21 +++++++++++++++++++++
 include/block/block_int.h |  5 +++++
 3 files changed, 29 insertions(+)
diff mbox

Patch

diff --git a/block.c b/block.c
index c2fb8bd..b88ad31 100644
--- a/block.c
+++ b/block.c
@@ -234,6 +234,8 @@  BlockDriverState *bdrv_new(void)
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
 
+    qemu_co_queue_init(&bs->flush_queue);
+
     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
 
     return bs;
@@ -2472,6 +2474,7 @@  int bdrv_truncate(BlockDriverState *bs, int64_t offset)
         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
         bdrv_dirty_bitmap_truncate(bs);
         bdrv_parent_cb_resize(bs);
+        ++bs->write_gen;
     }
     return ret;
 }
diff --git a/block/io.c b/block/io.c
index 7086908..f181ff7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1303,6 +1303,7 @@  static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     }
     bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
+    ++bs->write_gen;
     bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
 
     if (bs->wr_highest_offset < offset + bytes) {
@@ -2235,6 +2236,15 @@  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 
     tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
 
+    int current_gen = bs->write_gen;
+
+    /* Wait until any previous flushes are completed */
+    while (bs->flush_started_gen != bs->flushed_gen) {
+        qemu_co_queue_wait(&bs->flush_queue);
+    }
+
+    bs->flush_started_gen = current_gen;
+
     /* Write back all layers by calling one driver function */
     if (bs->drv->bdrv_co_flush) {
         ret = bs->drv->bdrv_co_flush(bs);
@@ -2255,6 +2265,11 @@  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
         goto flush_parent;
     }
 
+    /* Check if we really need to flush anything */
+    if (bs->flushed_gen == current_gen) {
+        goto flush_parent;
+    }
+
     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
     if (bs->drv->bdrv_co_flush_to_disk) {
         ret = bs->drv->bdrv_co_flush_to_disk(bs);
@@ -2285,6 +2300,7 @@  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
          */
         ret = 0;
     }
+
     if (ret < 0) {
         goto out;
     }
@@ -2295,6 +2311,10 @@  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 flush_parent:
     ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 out:
+    /* Notify any pending flushes that we have completed */
+    bs->flushed_gen = current_gen;
+    qemu_co_queue_restart_all(&bs->flush_queue);
+
     tracked_request_end(&req);
     return ret;
 }
@@ -2420,6 +2440,7 @@  int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
     }
     ret = 0;
 out:
+    ++bs->write_gen;
     bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                    req.bytes >> BDRV_SECTOR_BITS);
     tracked_request_end(&req);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 042c118..104d9ab 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -439,6 +439,11 @@  struct BlockDriverState {
     int copy_on_read; /* if nonzero, copy read backing sectors into image.
                          note this is a reference count */
 
+    CoQueue flush_queue;            /* Serializing flush queue */
+    unsigned int write_gen;         /* Current data generation */
+    unsigned int flush_started_gen; /* Generation for which flush has started */
+    unsigned int flushed_gen;       /* Flushed write generation */
+
     BlockDriver *drv; /* NULL means no media */
     void *opaque;